111 lines
3.9 KiB
Python
111 lines
3.9 KiB
Python
import pandas as pd
|
|
|
|
import tensorflow.keras
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import xgboost as xg
|
|
import keras_tuner as kt
|
|
|
|
|
|
import tensorflow.keras
|
|
from sklearn.preprocessing import PolynomialFeatures
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.layers import Dense
|
|
|
|
in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]
|
|
df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
|
df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")
|
|
|
|
df = pd.concat([df, df_test])
|
|
x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])
|
|
x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
|
x = x.iloc[:-600]
|
|
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])
|
|
from keras_tuner import HyperModel
|
|
|
|
|
|
class ANNHyperModel(HyperModel):
|
|
|
|
def build(self, hp):
|
|
model = tensorflow.keras.Sequential()
|
|
# Tune the number of units in the first Dense layer
|
|
# Choose an optimal value between 32-512
|
|
hp_units1 = hp.Int('units1', min_value=100, max_value=1024, step=32)
|
|
hp_units2 = hp.Int('units2', min_value=32, max_value=1024, step=32)
|
|
hp_units3 = hp.Int('units3', min_value=32, max_value=512, step=32)
|
|
hp_units4 = hp.Int('units4', min_value=32, max_value=512, step=32)
|
|
hp_units5 = hp.Int('units5', min_value=32, max_value=512, step=32)
|
|
hp_units6 = hp.Int('units6', min_value=32, max_value=512, step=32)
|
|
# hp_units7 = hp.Int('units7', min_value=32, max_value=512, step=32)
|
|
# hp_units8 = hp.Int('units8', min_value=32, max_value=512, step=32)
|
|
model.add(Dense(units=hp_units1, activation='relu'))
|
|
model.add(tensorflow.keras.layers.Dense(units=hp_units2, activation='relu'))
|
|
model.add(tensorflow.keras.layers.Dense(units=hp_units3, activation='relu'))
|
|
model.add(tensorflow.keras.layers.Dense(units=hp_units4, activation='relu'))
|
|
model.add(
|
|
tensorflow.keras.layers.Dense(units=hp_units5, activation='relu'))
|
|
model.add(
|
|
tensorflow.keras.layers.Dense(units=hp_units6, activation='relu'))
|
|
# model.add(
|
|
# tensorflow.keras.layers.Dense(units=hp_units7, activation='relu'))
|
|
# model.add(tensorflow.keras.layers.Dense(units=hp_units8, activation='relu'))
|
|
model.add(Dense(1, kernel_initializer='normal', activation='linear'))
|
|
|
|
# Tune the learning rate for the optimizer
|
|
# Choose an optimal value from 0.01, 0.001, or 0.0001
|
|
hp_learning_rate = hp.Choice('learning_rate',
|
|
values=[1e-2, 1e-3, 1e-4])
|
|
|
|
model.compile(
|
|
optimizer=tensorflow.keras.optimizers.Adam(learning_rate=hp_learning_rate),
|
|
loss="mean_squared_error",
|
|
metrics=["mean_squared_error"]
|
|
)
|
|
|
|
return model
|
|
|
|
|
|
hypermodel = ANNHyperModel()
|
|
|
|
tuner = kt.Hyperband(
|
|
hypermodel,
|
|
objective='mean_squared_error',
|
|
max_epochs=100,
|
|
factor=3,
|
|
directory='keras_tuner_dir',
|
|
project_name='keras_tuner_demo2'
|
|
)
|
|
#po#ly = PolynomialFeatures(2, interaction_only=True)
|
|
#x = poly.fit_transform(x)
|
|
|
|
tuner.search(x, y, epochs=100)
|
|
|
|
for h_param in [f"units{i}" for i in range(1,4)] + ['learning_rate']:
|
|
print(h_param, tuner.get_best_hyperparameters()[0].get(h_param))
|
|
|
|
best_model = tuner.get_best_models()[0]
|
|
best_model.build(x.shape)
|
|
best_model.summary()
|
|
|
|
best_model.fit(
|
|
x,
|
|
y,
|
|
epochs=100,
|
|
batch_size=64
|
|
)
|
|
x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
|
|
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
|
|
|
x_test = pd.concat([x_test, df_train])
|
|
|
|
x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
|
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
|
|
x_test = x_test.iloc[:-8760]
|
|
#poly = PolynomialFeatures(2, interaction_only=True)
|
|
#x_test= poly.fit_transform(x_test)
|
|
pred = best_model.predict(x_test)
|
|
out = pd.DataFrame(pred)
|
|
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)
|
|
|