138 lines
4.5 KiB
Python
138 lines
4.5 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import xgboost as xg
|
|
from sklearn.compose import TransformedTargetRegressor
|
|
from sklearn.model_selection import GridSearchCV
|
|
from sklearn.preprocessing import (
|
|
QuantileTransformer,
|
|
StandardScaler,
|
|
PolynomialFeatures,
|
|
)
|
|
|
|
import tensorflow.keras
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.layers import Dense
|
|
|
|
train = pd.read_csv("train/in.tsv", header=None, sep="\t")
|
|
train_expected = pd.read_csv("train/expected.tsv", header=None, sep="\t")
|
|
dev_expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t")
|
|
dev_0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
|
|
test_A = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
|
|
|
|
poly = PolynomialFeatures(2, interaction_only=True)
|
|
|
|
|
|
def preprocess_data(df_to_process, main_df=None):
|
|
final_df = pd.get_dummies(df_to_process, columns=[0, 3, 4])
|
|
final_df.drop(columns=[1, 2], inplace=True)
|
|
|
|
if type(main_df) == pd.DataFrame:
|
|
final_columns = [
|
|
value
|
|
for value in list(main_df.columns)
|
|
if value not in list(final_df.columns)
|
|
]
|
|
|
|
for col in final_columns:
|
|
final_df[col] = 0
|
|
|
|
return final_df
|
|
|
|
|
|
# f_train = pd.concat([train, dev_0])
|
|
x_train = preprocess_data(train)
|
|
dev_train = preprocess_data(dev_0, x_train)
|
|
|
|
test_A_df = preprocess_data(test_A, x_train)
|
|
y = train_expected[0]
|
|
|
|
# Define model
|
|
# model = Sequential()
|
|
# model = Sequential()
|
|
# model.add(Dense(20, input_dim=76, kernel_initializer='normal', activation='relu'))
|
|
# model.add(Dense(1, kernel_initializer='normal'))
|
|
# Compile model
|
|
# model.compile(loss='mean_squared_error', optimizer='adam')
|
|
# evaluate model with stand
|
|
model = Sequential(
|
|
[
|
|
Dense(512, activation="relu", input_dim=73),
|
|
tensorflow.keras.layers.BatchNormalization(),
|
|
Dense(512 // 2, activation="relu"),
|
|
tensorflow.keras.layers.BatchNormalization(),
|
|
Dense(512 // 4, activation="relu"),
|
|
tensorflow.keras.layers.BatchNormalization(),
|
|
Dense(512 // 8, activation="relu"),
|
|
tensorflow.keras.layers.BatchNormalization(),
|
|
Dense(32, activation="relu"),
|
|
tensorflow.keras.layers.BatchNormalization(),
|
|
Dense(1),
|
|
]
|
|
)
|
|
# model = tensorflow.keras.models.Sequential(
|
|
# [
|
|
# tensorflow.keras.layers.Dense(
|
|
# 128, activation="relu", input_dim=73,
|
|
# ),
|
|
# tensorflow.keras.layers.Dense(128 // 2, activation="relu"),
|
|
# tensorflow.keras.layers.Dense(128 // 4, activation="relu"),
|
|
# tensorflow.keras.layers.Dense(128 // 8, activation="relu"),
|
|
# tensorflow.keras.layers.Dense(32, activation="relu"),
|
|
# tensorflow.keras.layers.Dense(1),
|
|
# ]
|
|
# )
|
|
# model.add(Dense(16, input_dim=97, activation= "relu"))
|
|
# model.add(Dense(32, activation= "relu"))
|
|
# model.add(Dense(64, activation= "relu"))
|
|
# model.add(Dense(32, activation= "relu"))
|
|
# model.add(Dense(1))
|
|
# model.summary() #Print model Summary
|
|
|
|
model.compile(
|
|
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
|
|
)
|
|
f_train = np.asarray(x_train).astype(np.float32)
|
|
y = np.asarray(y).astype(np.float32)
|
|
model.fit(f_train, y, epochs=100)
|
|
|
|
|
|
# param_grid = {
|
|
# "n_estimators": [110, 100, 80, 60, 55, 51, 45, 35, 25],
|
|
# "max_depth": [5, 6, 7, 8, 9, 10, 11],
|
|
# "reg_lambda": [0.26, 0.25, 0.2, 0.15, 0.10],
|
|
# }
|
|
#
|
|
# grid = GridSearchCV(
|
|
# xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1
|
|
# ) #
|
|
# regr_trans = TransformedTargetRegressor(
|
|
# regressor=grid, transformer=QuantileTransformer(output_distribution="normal")
|
|
# )
|
|
#
|
|
# # fitting the model for grid search
|
|
# grid_result = regr_trans.fit(train_df, y)
|
|
# best_params = grid_result.regressor_.best_params_
|
|
#
|
|
# # using best params to create and fit model
|
|
# best_model = xg.XGBRFRegressor(
|
|
# max_depth=best_params["max_depth"],
|
|
# n_estimators=best_params["n_estimators"],
|
|
# reg_lambda=best_params["reg_lambda"],
|
|
# )
|
|
# regr_trans = TransformedTargetRegressor(
|
|
# regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")
|
|
# )
|
|
|
|
# regr_trans.fit(train_df, y)
|
|
# dev0_predicted = regr_trans.predict(dev_df)
|
|
# test_A_predicted = regr_trans.predict(test_A_df)
|
|
|
|
dev0_predicted = model.predict(dev_train)
|
|
test_A_predicted = model.predict(test_A_df)
|
|
|
|
# dev0_predicted = np.round(dev0_predicted, decimals=1)
|
|
# test_A_predicted = np.round(test_A_predicted, decimals=1)
|
|
|
|
pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None)
|
|
pd.DataFrame(test_A_predicted).to_csv("test-A/out.tsv", header=None, index=None)
|