second try
This commit is contained in:
parent
c4e16ee555
commit
148eb732ae
1186
dev-0/out.tsv
1186
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
104
run.py
104
run.py
@ -5,6 +5,10 @@ from sklearn.compose import TransformedTargetRegressor
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
||||
|
||||
import tensorflow.keras
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense
|
||||
|
||||
train = pd.read_csv("train/in.tsv", header=None, sep="\t")
|
||||
train_expected = pd.read_csv("train/expected.tsv", header=None, sep="\t")
|
||||
dev_0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
|
||||
@ -12,11 +16,11 @@ test_A = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
|
||||
|
||||
|
||||
def preprocess_data(df_to_process, main_df=None):
|
||||
final_df = pd.get_dummies(df_to_process, columns=[1, 2])
|
||||
final_df.drop(columns=[0], inplace=True)
|
||||
numeric = [3, 4]
|
||||
sc = StandardScaler()
|
||||
final_df[numeric] = sc.fit_transform(final_df[numeric])
|
||||
final_df = pd.get_dummies(df_to_process, columns=[0, 1, 2, 3, 4])
|
||||
# final_df.drop(columns=[0], inplace=True)
|
||||
# numeric = [3]
|
||||
# sc = StandardScaler()
|
||||
# final_df[numeric] = sc.fit_transform(final_df[numeric])
|
||||
|
||||
if type(main_df) == pd.DataFrame:
|
||||
final_columns = [
|
||||
@ -30,44 +34,74 @@ def preprocess_data(df_to_process, main_df=None):
|
||||
|
||||
return final_df
|
||||
|
||||
|
||||
f_train = pd.concat([train, dev_0])
|
||||
train_df = preprocess_data(train)
|
||||
dev_df = preprocess_data(dev_0, train_df)
|
||||
# dev_df = preprocess_data(dev_0, train_df)
|
||||
test_A_df = preprocess_data(test_A, train_df)
|
||||
y = train_expected[0]
|
||||
|
||||
param_grid = {
|
||||
"n_estimators": [100, 80, 60, 55, 51, 45],
|
||||
"max_depth": [7, 8],
|
||||
"reg_lambda": [0.26, 0.25, 0.2],
|
||||
}
|
||||
|
||||
grid = GridSearchCV(
|
||||
xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1
|
||||
) #
|
||||
regr_trans = TransformedTargetRegressor(
|
||||
regressor=grid, transformer=QuantileTransformer(output_distribution="normal")
|
||||
# Define model
|
||||
# model = Sequential()
|
||||
model = tensorflow.keras.models.Sequential(
|
||||
[
|
||||
tensorflow.keras.layers.Dense(
|
||||
128, activation="relu", input_dim=97, kernel_regularizer="l2"
|
||||
),
|
||||
tensorflow.keras.layers.Dropout(0.5),
|
||||
tensorflow.keras.layers.Dense(128 // 2, activation="relu", kernel_regularizer="l2"),
|
||||
tensorflow.keras.layers.Dense(128 // 4, activation="relu", kernel_regularizer="l2"),
|
||||
tensorflow.keras.layers.Dense(128 // 8, activation="relu", kernel_regularizer="l2"),
|
||||
tensorflow.keras.layers.Dense(32, activation="relu", kernel_regularizer="l2"),
|
||||
tensorflow.keras.layers.Dense(1),
|
||||
]
|
||||
)
|
||||
# model.add(Dense(16, input_dim=97, activation= "relu"))
|
||||
# model.add(Dense(32, activation= "relu"))
|
||||
# model.add(Dense(64, activation= "relu"))
|
||||
# model.add(Dense(32, activation= "relu"))
|
||||
# model.add(Dense(1))
|
||||
#model.summary() #Print model Summary
|
||||
|
||||
# fitting the model for grid search
|
||||
grid_result = regr_trans.fit(train_df, y)
|
||||
best_params = grid_result.regressor_.best_params_
|
||||
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
|
||||
model.fit(train_df, y, epochs=20)
|
||||
|
||||
# using best params to create and fit model
|
||||
best_model = xg.XGBRFRegressor(
|
||||
max_depth=best_params["max_depth"],
|
||||
n_estimators=best_params["n_estimators"],
|
||||
reg_lambda=best_params["reg_lambda"],
|
||||
)
|
||||
regr_trans = TransformedTargetRegressor(
|
||||
regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")
|
||||
)
|
||||
|
||||
regr_trans.fit(train_df, y)
|
||||
dev0_predicted = regr_trans.predict(dev_df)
|
||||
test_A_predicted = regr_trans.predict(test_A_df)
|
||||
dev0_predicted = np.round(dev0_predicted, decimals=1)
|
||||
# param_grid = {
|
||||
# "n_estimators": [110, 100, 80, 60, 55, 51, 45, 35, 25],
|
||||
# "max_depth": [5, 6, 7, 8, 9, 10, 11],
|
||||
# "reg_lambda": [0.26, 0.25, 0.2, 0.15, 0.10],
|
||||
# }
|
||||
#
|
||||
# grid = GridSearchCV(
|
||||
# xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1
|
||||
# ) #
|
||||
# regr_trans = TransformedTargetRegressor(
|
||||
# regressor=grid, transformer=QuantileTransformer(output_distribution="normal")
|
||||
# )
|
||||
#
|
||||
# # fitting the model for grid search
|
||||
# grid_result = regr_trans.fit(train_df, y)
|
||||
# best_params = grid_result.regressor_.best_params_
|
||||
#
|
||||
# # using best params to create and fit model
|
||||
# best_model = xg.XGBRFRegressor(
|
||||
# max_depth=best_params["max_depth"],
|
||||
# n_estimators=best_params["n_estimators"],
|
||||
# reg_lambda=best_params["reg_lambda"],
|
||||
# )
|
||||
# regr_trans = TransformedTargetRegressor(
|
||||
# regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")
|
||||
# )
|
||||
|
||||
# regr_trans.fit(train_df, y)
|
||||
# dev0_predicted = regr_trans.predict(dev_df)
|
||||
# test_A_predicted = regr_trans.predict(test_A_df)
|
||||
|
||||
# dev0_predicted = model.predict(dev_df)
|
||||
test_A_predicted = model.predict(test_A_df)
|
||||
|
||||
# dev0_predicted = np.round(dev0_predicted, decimals=1)
|
||||
test_A_predicted = np.round(test_A_predicted, decimals=1)
|
||||
|
||||
pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None)
|
||||
# pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None)
|
||||
pd.DataFrame(test_A_predicted).to_csv("test-A/out.tsv", header=None, index=None)
|
||||
|
1416
test-A/out.tsv
1416
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user