precipitation-pl/run.py

import numpy as np
import pandas as pd
import xgboost as xg
from sklearn.compose import TransformedTargetRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import QuantileTransformer, StandardScaler

import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

train = pd.read_csv("train/in.tsv", header=None, sep="\t")
train_expected = pd.read_csv("train/expected.tsv", header=None, sep="\t")
dev_expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t")
dev_0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
test_A = pd.read_csv("test-A/in.tsv", header=None, sep="\t")


def preprocess_data(df_to_process, main_df=None):
    final_df = pd.get_dummies(df_to_process, columns=[1, 3, 4])
    final_df.drop(columns=[0], inplace=True)
    final_df.drop(columns=[2], inplace=True)
    # numeric = [3]
    # sc = StandardScaler()
    # final_df[numeric] = sc.fit_transform(final_df[numeric])

    if type(main_df) == pd.DataFrame:
        final_columns = [
            value
            for value in list(main_df.columns)
            if value not in list(final_df.columns)
        ]

        for col in final_columns:
            final_df[col] = 0

    return final_df

f_train = pd.concat([train, dev_0])
f_train = preprocess_data(f_train)
# dev_df = preprocess_data(dev_0, train_df)
test_A_df = preprocess_data(test_A, f_train)
y_expected = pd.concat([train_expected, dev_expected])
y = y_expected[0]

# Define model
# model = Sequential()
# model = Sequential()
# model.add(Dense(20, input_dim=76, kernel_initializer='normal', activation='relu'))
# model.add(Dense(1, kernel_initializer='normal'))
# Compile model
# model.compile(loss='mean_squared_error', optimizer='adam')
# evaluate model with stand
model = tensorflow.keras.models.Sequential(
    [
        tensorflow.keras.layers.Dense(
            128, activation="relu", input_dim=73,
        ),
        tensorflow.keras.layers.Dense(128 // 2, activation="relu"),
        tensorflow.keras.layers.Dense(128 // 4, activation="relu"),
        tensorflow.keras.layers.Dense(128 // 8, activation="relu"),
        tensorflow.keras.layers.Dense(32, activation="relu"),
        tensorflow.keras.layers.Dense(1),
    ]
)
# model.add(Dense(16, input_dim=97, activation= "relu"))
# model.add(Dense(32, activation= "relu"))
# model.add(Dense(64, activation= "relu"))
# model.add(Dense(32, activation= "relu"))
# model.add(Dense(1))
#model.summary() #Print model Summary

model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
f_train = np.asarray(f_train).astype(np.float32)
y = np.asarray(y).astype(np.float32)
model.fit(f_train, y, epochs=100)


# param_grid = {
#     "n_estimators": [110, 100, 80, 60, 55, 51, 45, 35, 25],
#     "max_depth": [5, 6, 7, 8, 9, 10, 11],
#     "reg_lambda": [0.26, 0.25, 0.2, 0.15, 0.10],
# }
#
# grid = GridSearchCV(
#     xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1
# )  #
# regr_trans = TransformedTargetRegressor(
#     regressor=grid, transformer=QuantileTransformer(output_distribution="normal")
# )
#
# # fitting the model for grid search
# grid_result = regr_trans.fit(train_df, y)
# best_params = grid_result.regressor_.best_params_
#
# # using best params to create and fit model
# best_model = xg.XGBRFRegressor(
#     max_depth=best_params["max_depth"],
#     n_estimators=best_params["n_estimators"],
#     reg_lambda=best_params["reg_lambda"],
# )
# regr_trans = TransformedTargetRegressor(
#     regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")
# )

# regr_trans.fit(train_df, y)
# dev0_predicted = regr_trans.predict(dev_df)
# test_A_predicted = regr_trans.predict(test_A_df)

# dev0_predicted = model.predict(dev_df)
test_A_predicted = model.predict(test_A_df)

# dev0_predicted = np.round(dev0_predicted, decimals=1)
test_A_predicted = np.round(test_A_predicted, decimals=1)

# pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None)
pd.DataFrame(test_A_predicted).to_csv("test-A/out.tsv", header=None, index=None)