precipitation-pl/run5.py
2022-05-23 18:33:53 +02:00

95 lines
4.3 KiB
Python

import pandas as pd
# import tensorflow.keras
import numpy as np
import pandas as pd
import xgboost as xg
import tensorflow.keras
from keras.layers import Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]
df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")
df = pd.concat([df, df_test])
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])
df["rainfall"] = y["rainfall"]
x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
y2 = pd.read_csv("dev-0/expected.tsv", sep="\t", names=["rainfall"])
x_test = pd.concat([x_test, df_train])
y = pd.concat([y, y2])
# grouped_multiple_years = df.groupby(['id_stacji', 'rok']).agg({'rainfall': ['mean', 'min', 'max']})
# grouped_multiple_months = df.groupby(['id_stacji', 'miesiąc']).agg({'rainfall': ['mean', 'min', 'max']})
# flat = grouped_multiple_years.reset_index()
# for index, row in flat.iterrows():
# df[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_mean"] = row["rainfall"]["mean"]
# df[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_max"] = row["rainfall"]["min"]
# df[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_min"] = row["rainfall"]["max"]
# x_test[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_mean"] = row["rainfall"]["mean"]
# x_test[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_max"] = row["rainfall"]["min"]
# x_test[f"{row['id_stacji'].values[0]}_{row['rok'].values[0]}_min"] = row["rainfall"]["max"]
#
# # flat2 = grouped_multiple_months.reset_index()
# # for index, row in flat2.iterrows():
# # df[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_mean"] = row["rainfall"]["mean"]
# # df[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_max"] = row["rainfall"]["min"]
# # df[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_min"] = row["rainfall"]["max"]
# # x_test[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_mean"] = row["rainfall"]["mean"]
# # x_test[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_max"] = row["rainfall"]["min"]
# x_test[f"{row['id_stacji'].values[0]}_{row['miesiąc'].values[0]}_min"] = row["rainfall"]["max"]
x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])
x = x.drop(["nazwa_stacji", "typ_zbioru", "rainfall"], axis=1)
model = Sequential(
[
Dense(2048, activation="relu", input_dim=73),
Dense(1024, activation="relu"),
Dense(512, activation="relu"),
# tensorflow.keras.layers.BatchNormalization(),
Dense(256, activation="relu"),
# tensorflow.keras.layers.BatchNormalization(),
Dense(128, activation="relu"),
# tensorflow.keras.layers.BatchNormalization(),
Dense(64, activation="relu"),
# tensorflow.keras.layers.BatchNormalization(),
Dense(32, activation="relu"),
Dense(16, activation="relu"),
# tensorflow.keras.layers.BatchNormalization(),
Dense(1, activation="linear"),
]
)
# input = tensorflow.keras.layers.Input(shape=x.shape[1:])
# hidden1 = tensorflow.keras.layers.Dense(1024, activation='relu')(input)
# hidden2 = tensorflow.keras.layers.Dense(512, activation='relu')(hidden1)
# hidden3 = tensorflow.keras.layers.Dense(256, activation='relu')(hidden2)
# hidden4 = tensorflow.keras.layers.Dense(128, activation='relu')(hidden3)
# concat = tensorflow.keras.layers.Concatenate()([input, hidden4])
# output = tensorflow.keras.layers.Dense(1, activation="linear")(concat)
# model = tensorflow.keras.models.Model(inputs=[input], outputs=[output])
model.compile(
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
)
# estimator = KerasRegressor(build_fn=model, epochs=100, batch_size=10, verbose=0)
# estimator.fit(x, y)
model.fit(x, y, epochs=100)
x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
x_test = x_test.iloc[:-8760]
pred = model.predict(x_test)
# pred = estimator.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)