xx
This commit is contained in:
parent
95b383a1b4
commit
566029c717
76
run.py
76
run.py
@ -3,7 +3,11 @@ import pandas as pd
|
||||
import xgboost as xg
|
||||
from sklearn.compose import TransformedTargetRegressor
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
||||
from sklearn.preprocessing import (
|
||||
QuantileTransformer,
|
||||
StandardScaler,
|
||||
PolynomialFeatures,
|
||||
)
|
||||
|
||||
import tensorflow.keras
|
||||
from tensorflow.keras.models import Sequential
|
||||
@ -15,14 +19,12 @@ dev_expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t")
|
||||
dev_0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
|
||||
test_A = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
|
||||
|
||||
poly = PolynomialFeatures(2, interaction_only=True)
|
||||
|
||||
|
||||
def preprocess_data(df_to_process, main_df=None):
|
||||
final_df = pd.get_dummies(df_to_process, columns=[1, 3, 4])
|
||||
final_df.drop(columns=[0], inplace=True)
|
||||
final_df.drop(columns=[2], inplace=True)
|
||||
# numeric = [3]
|
||||
# sc = StandardScaler()
|
||||
# final_df[numeric] = sc.fit_transform(final_df[numeric])
|
||||
final_df = pd.get_dummies(df_to_process, columns=[0, 3, 4])
|
||||
final_df.drop(columns=[1, 2], inplace=True)
|
||||
|
||||
if type(main_df) == pd.DataFrame:
|
||||
final_columns = [
|
||||
@ -36,12 +38,13 @@ def preprocess_data(df_to_process, main_df=None):
|
||||
|
||||
return final_df
|
||||
|
||||
f_train = pd.concat([train, dev_0])
|
||||
f_train = preprocess_data(f_train)
|
||||
# dev_df = preprocess_data(dev_0, train_df)
|
||||
test_A_df = preprocess_data(test_A, f_train)
|
||||
y_expected = pd.concat([train_expected, dev_expected])
|
||||
y = y_expected[0]
|
||||
|
||||
# f_train = pd.concat([train, dev_0])
|
||||
x_train = preprocess_data(train)
|
||||
dev_train = preprocess_data(dev_0, x_train)
|
||||
|
||||
test_A_df = preprocess_data(test_A, x_train)
|
||||
y = train_expected[0]
|
||||
|
||||
# Define model
|
||||
# model = Sequential()
|
||||
@ -51,27 +54,44 @@ y = y_expected[0]
|
||||
# Compile model
|
||||
# model.compile(loss='mean_squared_error', optimizer='adam')
|
||||
# evaluate model with stand
|
||||
model = tensorflow.keras.models.Sequential(
|
||||
model = Sequential(
|
||||
[
|
||||
tensorflow.keras.layers.Dense(
|
||||
128, activation="relu", input_dim=73,
|
||||
),
|
||||
tensorflow.keras.layers.Dense(128 // 2, activation="relu"),
|
||||
tensorflow.keras.layers.Dense(128 // 4, activation="relu"),
|
||||
tensorflow.keras.layers.Dense(128 // 8, activation="relu"),
|
||||
tensorflow.keras.layers.Dense(32, activation="relu"),
|
||||
tensorflow.keras.layers.Dense(1),
|
||||
Dense(512, activation="relu", input_dim=73),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 2, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 4, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 8, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(32, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(1),
|
||||
]
|
||||
)
|
||||
# model = tensorflow.keras.models.Sequential(
|
||||
# [
|
||||
# tensorflow.keras.layers.Dense(
|
||||
# 128, activation="relu", input_dim=73,
|
||||
# ),
|
||||
# tensorflow.keras.layers.Dense(128 // 2, activation="relu"),
|
||||
# tensorflow.keras.layers.Dense(128 // 4, activation="relu"),
|
||||
# tensorflow.keras.layers.Dense(128 // 8, activation="relu"),
|
||||
# tensorflow.keras.layers.Dense(32, activation="relu"),
|
||||
# tensorflow.keras.layers.Dense(1),
|
||||
# ]
|
||||
# )
|
||||
# model.add(Dense(16, input_dim=97, activation= "relu"))
|
||||
# model.add(Dense(32, activation= "relu"))
|
||||
# model.add(Dense(64, activation= "relu"))
|
||||
# model.add(Dense(32, activation= "relu"))
|
||||
# model.add(Dense(1))
|
||||
#model.summary() #Print model Summary
|
||||
# model.summary() #Print model Summary
|
||||
|
||||
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
|
||||
f_train = np.asarray(f_train).astype(np.float32)
|
||||
model.compile(
|
||||
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
|
||||
)
|
||||
f_train = np.asarray(x_train).astype(np.float32)
|
||||
y = np.asarray(y).astype(np.float32)
|
||||
model.fit(f_train, y, epochs=100)
|
||||
|
||||
@ -107,11 +127,11 @@ model.fit(f_train, y, epochs=100)
|
||||
# dev0_predicted = regr_trans.predict(dev_df)
|
||||
# test_A_predicted = regr_trans.predict(test_A_df)
|
||||
|
||||
# dev0_predicted = model.predict(dev_df)
|
||||
dev0_predicted = model.predict(dev_train)
|
||||
test_A_predicted = model.predict(test_A_df)
|
||||
|
||||
# dev0_predicted = np.round(dev0_predicted, decimals=1)
|
||||
test_A_predicted = np.round(test_A_predicted, decimals=1)
|
||||
# test_A_predicted = np.round(test_A_predicted, decimals=1)
|
||||
|
||||
# pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None)
|
||||
pd.DataFrame(dev0_predicted).to_csv("dev-0/out.tsv", header=None, index=None)
|
||||
pd.DataFrame(test_A_predicted).to_csv("test-A/out.tsv", header=None, index=None)
|
||||
|
50
run2.py
Normal file
50
run2.py
Normal file
@ -0,0 +1,50 @@
|
||||
import pandas as pd
|
||||
|
||||
import tensorflow.keras
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense
|
||||
|
||||
in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]
|
||||
df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
||||
df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")
|
||||
|
||||
df = pd.concat([df, df_test])
|
||||
df = df.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
||||
x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])
|
||||
|
||||
x = x.iloc[:-600]
|
||||
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Dense(512, activation="relu", input_dim=73),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 2, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 4, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 8, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(32, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(1),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
|
||||
)
|
||||
model.fit(x, y, epochs=100)
|
||||
|
||||
x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
|
||||
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
||||
|
||||
x_test = pd.concat([x_test, df_train])
|
||||
x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
||||
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
|
||||
|
||||
x_test = x_test.iloc[:-8760]
|
||||
pred = model.predict(x_test)
|
||||
|
||||
out = pd.DataFrame(pred)
|
||||
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)
|
1440
test-A/out.tsv
1440
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user