import numpy as np import pandas as pd import xgboost as xg from sklearn.compose import TransformedTargetRegressor from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import ( QuantileTransformer, StandardScaler, PolynomialFeatures, ) import tensorflow.keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense train = pd.read_csv("train/in.tsv", header=None, sep="\t") train_expected = pd.read_csv("train/expected.tsv", header=None, sep="\t") dev_expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t") dev_0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t") test_A = pd.read_csv("test-A/in.tsv", header=None, sep="\t") poly = PolynomialFeatures(2, interaction_only=True) def preprocess_data(df_to_process, main_df=None): final_df = pd.get_dummies(df_to_process, columns=[0, 3, 4]) final_df.drop(columns=[1, 2], inplace=True) if type(main_df) == pd.DataFrame: final_columns = [ value for value in list(main_df.columns) if value not in list(final_df.columns) ] for col in final_columns: final_df[col] = 0 return final_df # f_train = pd.concat([train, dev_0]) x_train = preprocess_data(train) dev_train = preprocess_data(dev_0, x_train) test_A_df = preprocess_data(test_A, x_train) y = train_expected[0] # Define model # model = Sequential() # model = Sequential() # model.add(Dense(20, input_dim=76, kernel_initializer='normal', activation='relu')) # model.add(Dense(1, kernel_initializer='normal')) # Compile model # model.compile(loss='mean_squared_error', optimizer='adam') # evaluate model with stand model = Sequential( [ Dense(512, activation="relu", input_dim=73), tensorflow.keras.layers.BatchNormalization(), Dense(512 // 2, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(512 // 4, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(512 // 8, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(32, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(1), ] ) # model = tensorflow.keras.models.Sequential( # [ # tensorflow.keras.layers.Dense( # 128, activation="relu", input_dim=73, # ), # tensorflow.keras.layers.Dense(128 // 2, activation="relu"), # tensorflow.keras.layers.Dense(128 // 4, activation="relu"), # tensorflow.keras.layers.Dense(128 // 8, activation="relu"), # tensorflow.keras.layers.Dense(32, activation="relu"), # tensorflow.keras.layers.Dense(1), # ] # ) # model.add(Dense(16, input_dim=97, activation= "relu")) # model.add(Dense(32, activation= "relu")) # model.add(Dense(64, activation= "relu")) # model.add(Dense(32, activation= "relu")) # model.add(Dense(1)) # model.summary() #Print model Summary model.compile( loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"] ) f_train = np.asarray(x_train).astype(np.float32) y = np.asarray(y).astype(np.float32) model.fit(f_train, y, epochs=100) # param_grid = { # "n_estimators": [110, 100, 80, 60, 55, 51, 45, 35, 25], # "max_depth": [5, 6, 7, 8, 9, 10, 11], # "reg_lambda": [0.26, 0.25, 0.2, 0.15, 0.10], # } # # grid = GridSearchCV( # xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1 # ) # # regr_trans = TransformedTargetRegressor( # regressor=grid, transformer=QuantileTransformer(output_distribution="normal") # ) # # # fitting the model for grid search # grid_result = regr_trans.fit(train_df, y) # best_params = grid_result.regressor_.best_params_ # # # using best params to create and fit model # best_model = xg.XGBRFRegressor( # max_depth=best_params["max_depth"], # n_estimators=best_params["n_estimators"], # reg_lambda=best_params["reg_lambda"], # ) # regr_trans = TransformedTargetRegressor( # regressor=best_model, transformer=QuantileTransformer(output_distribution="normal") # ) # regr_trans.fit(train_df, y) # dev0_predicted = regr_trans.predict(dev_df) # test_A_predicted = regr_trans.predict(test_A_df) dev0_predicted = model.predict(dev_train) test_A_predicted = model.predict(test_A_df) # dev0_predicted = np.round(dev0_predicted, decimals=1) # test_A_predicted = np.round(test_A_predicted, decimals=1) pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None) pd.DataFrame(test_A_predicted).to_csv("test-A/out.tsv", header=None, index=None)