import pandas as pd # import tensorflow.keras import numpy as np import pandas as pd import xgboost as xg import tensorflow.keras from keras.layers import Dropout from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"] df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t") df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t") df = pd.concat([df, df_test]) y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"]) df["rainfall"] = y["rainfall"] x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns) df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t") x_test = pd.concat([x_test, df_train]) # grouped_multiple_years = df.groupby(['id_stacji', 'rok']).agg({'rainfall': ['mean', 'min', 'max']}) # grouped_multiple_months = df.groupby(['id_stacji', 'miesiąc']).agg({'rainfall': ['mean', 'min', 'max']}) # flat = grouped_multiple_years.reset_index() # for index, row in flat.iterrows(): # df[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_mean"] = row["rainfall"]["mean"] # df[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_max"] = row["rainfall"]["min"] # df[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_min"] = row["rainfall"]["max"] # x_test[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_mean"] = row["rainfall"]["mean"] # x_test[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_max"] = row["rainfall"]["min"] # x_test[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_min"] = row["rainfall"]["max"] # # # flat2 = grouped_multiple_months.reset_index() # # for index, row in flat2.iterrows(): # # df[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_mean"] = row["rainfall"]["mean"] # # df[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_max"] = row["rainfall"]["min"] # # df[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_min"] = row["rainfall"]["max"] # # x_test[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_mean"] = row["rainfall"]["mean"] # # x_test[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_max"] = row["rainfall"]["min"] # x_test[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_min"] = row["rainfall"]["max"] x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"]) x = x.drop(["nazwa_stacji", "typ_zbioru", "rainfall"], axis=1) x = x.iloc[:-600] model = Sequential( [ Dense(2048, activation="relu", input_dim=73), Dense(1024, activation="relu"), Dense(512, activation="relu"), # tensorflow.keras.layers.BatchNormalization(), Dense(256, activation="relu"), # tensorflow.keras.layers.BatchNormalization(), Dense(128, activation="relu"), # tensorflow.keras.layers.BatchNormalization(), Dense(64, activation="relu"), # tensorflow.keras.layers.BatchNormalization(), Dense(32, activation="relu"), # tensorflow.keras.layers.BatchNormalization(), Dense(1, activation="linear"), ] ) # input = tensorflow.keras.layers.Input(shape=x.shape[1:]) # hidden1 = tensorflow.keras.layers.Dense(1024, activation='relu')(input) # hidden2 = tensorflow.keras.layers.Dense(512, activation='relu')(hidden1) # hidden3 = tensorflow.keras.layers.Dense(256, activation='relu')(hidden2) # hidden4 = tensorflow.keras.layers.Dense(128, activation='relu')(hidden3) # concat = tensorflow.keras.layers.Concatenate()([input, hidden4]) # output = tensorflow.keras.layers.Dense(1, activation="linear")(concat) # model = tensorflow.keras.models.Model(inputs=[input], outputs=[output]) model.compile( loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"] ) # estimator = KerasRegressor(build_fn=model, epochs=100, batch_size=10, verbose=0) # estimator.fit(x, y) model.fit(x, y, epochs=100) exit() x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1) x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"]) x_test = x_test.iloc[:-8760] pred = model.predict(x_test) # pred = estimator.predict(x_test) out = pd.DataFrame(pred) out.to_csv("test-A/out2.tsv", sep="\t", header=False, index=False)