final attempt
This commit is contained in:
parent
292f341428
commit
a2d32d05e2
3
run.py
3
run.py
@ -19,9 +19,6 @@ dev_expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t")
|
||||
dev_0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t")
|
||||
test_A = pd.read_csv("test-A/in.tsv", header=None, sep="\t")
|
||||
|
||||
poly = PolynomialFeatures(2, interaction_only=True)
|
||||
|
||||
|
||||
def preprocess_data(df_to_process, main_df=None):
|
||||
final_df = pd.get_dummies(df_to_process, columns=[0, 3, 4])
|
||||
final_df.drop(columns=[1, 2], inplace=True)
|
||||
|
167
run2.py
167
run2.py
@ -5,6 +5,20 @@ from sklearn.linear_model import LinearRegression
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xgboost as xg
|
||||
from sklearn.compose import TransformedTargetRegressor
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.preprocessing import (
|
||||
QuantileTransformer,
|
||||
StandardScaler,
|
||||
PolynomialFeatures,
|
||||
)
|
||||
|
||||
import tensorflow.keras
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense
|
||||
# Import the required library
|
||||
from geopy.geocoders import Nominatim
|
||||
|
||||
@ -20,30 +34,32 @@ df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")
|
||||
df = pd.concat([df, df_test])
|
||||
# df = df.drop(["nazwa_stacji"], axis=1)
|
||||
x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])
|
||||
geo_lat = {
|
||||
"BIEBRZA-PIEŃCZYKÓWEK" : 53.65
|
||||
}
|
||||
geo_long = {
|
||||
"BIEBRZA-PIEŃCZYKÓWEK": 22.58
|
||||
}
|
||||
for xd in x["nazwa_stacji"].unique():
|
||||
location = geolocator.geocode(xd)
|
||||
if xd == "BIEBRZA-PIEŃCZYKÓWEK":
|
||||
pass
|
||||
else:
|
||||
print(xd)
|
||||
geo_lat[xd] = location.latitude
|
||||
geo_long[xd] = location.longitude
|
||||
|
||||
|
||||
x["latitude"] = x["nazwa_stacji"].map(geo_lat)
|
||||
x["longitude"] = x["nazwa_stacji"].map(geo_long)
|
||||
# geo_lat = {
|
||||
# "BIEBRZA-PIEŃCZYKÓWEK" : 53.65
|
||||
# }
|
||||
# geo_long = {
|
||||
# "BIEBRZA-PIEŃCZYKÓWEK": 22.58
|
||||
# }
|
||||
# for xd in x["nazwa_stacji"].unique():
|
||||
# location = geolocator.geocode(xd)
|
||||
# if xd == "BIEBRZA-PIEŃCZYKÓWEK":
|
||||
# pass
|
||||
# else:
|
||||
# print(xd)
|
||||
# geo_lat[xd] = location.latitude
|
||||
# geo_long[xd] = location.longitude
|
||||
#
|
||||
#
|
||||
# x["latitude"] = x["nazwa_stacji"].map(geo_lat)
|
||||
# x["longitude"] = x["nazwa_stacji"].map(geo_long)
|
||||
x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
||||
|
||||
print(x)
|
||||
print(geo_lat)
|
||||
print(geo_long)
|
||||
poly = PolynomialFeatures(2, interaction_only=True)
|
||||
#
|
||||
# print(x)
|
||||
# print(geo_lat)
|
||||
# print(geo_long)
|
||||
x = x.iloc[:-600]
|
||||
x = poly.fit_transform(x)
|
||||
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])
|
||||
|
||||
|
||||
@ -51,59 +67,90 @@ from sklearn.preprocessing import PolynomialFeatures
|
||||
# xxx
|
||||
# poly = PolynomialFeatures(2, interaction_only=True)
|
||||
# df = poly.fit_transform(x)
|
||||
param_grid = {
|
||||
"n_estimators": [100, 80, 60, 55, 51, 45],
|
||||
"max_depth": [7, 8],
|
||||
"reg_lambda": [0.26, 0.25, 0.2],
|
||||
}
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Dense(512, activation="relu", input_dim=75),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 2, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 4, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 8, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(32, activation="relu"),
|
||||
tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(1),
|
||||
]
|
||||
grid = GridSearchCV(
|
||||
xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1
|
||||
) #
|
||||
regr_trans = TransformedTargetRegressor(
|
||||
regressor=grid, transformer=QuantileTransformer(output_distribution="normal")
|
||||
)
|
||||
|
||||
model.compile(
|
||||
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
|
||||
# fitting the model for grid search
|
||||
grid_result = regr_trans.fit(x, y)
|
||||
best_params = grid_result.regressor_.best_params_
|
||||
|
||||
# using best params to create and fit model
|
||||
best_model = xg.XGBRFRegressor(
|
||||
max_depth=best_params["max_depth"],
|
||||
n_estimators=best_params["n_estimators"],
|
||||
reg_lambda=best_params["reg_lambda"],
|
||||
)
|
||||
model.fit(x, y, epochs=100)
|
||||
regr_trans = TransformedTargetRegressor(
|
||||
regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")
|
||||
)
|
||||
|
||||
regr_trans.fit(x, y)
|
||||
|
||||
# model = Sequential(
|
||||
# [
|
||||
# Dense(512, activation="relu", input_dim=75),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
# Dense(512 // 2, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
# Dense(512 // 4, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
# Dense(512 // 8, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
# Dense(32, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
# Dense(1),
|
||||
# ]
|
||||
# )
|
||||
#
|
||||
# model.compile(
|
||||
# loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
|
||||
# )
|
||||
# model.fit(x, y, epochs=100)
|
||||
|
||||
x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
|
||||
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
||||
|
||||
geo_lat = {
|
||||
"BIEBRZA-PIEŃCZYKÓWEK" : 53.65
|
||||
}
|
||||
geo_long = {
|
||||
"BIEBRZA-PIEŃCZYKÓWEK": 22.58
|
||||
}
|
||||
# geo_lat = {
|
||||
# "BIEBRZA-PIEŃCZYKÓWEK" : 53.65
|
||||
# }
|
||||
# geo_long = {
|
||||
# "BIEBRZA-PIEŃCZYKÓWEK": 22.58
|
||||
# }
|
||||
x_test = pd.concat([x_test, df_train])
|
||||
|
||||
for xd in x_test["nazwa_stacji"].unique():
|
||||
location = geolocator.geocode(xd)
|
||||
if xd == "BIEBRZA-PIEŃCZYKÓWEK":
|
||||
pass
|
||||
else:
|
||||
print(xd)
|
||||
geo_lat[xd] = location.latitude
|
||||
geo_long[xd] = location.longitude
|
||||
|
||||
|
||||
x_test["latitude"] = x_test["nazwa_stacji"].map(geo_lat)
|
||||
x_test["longitude"] = x_test["nazwa_stacji"].map(geo_long)
|
||||
# for xd in x_test["nazwa_stacji"].unique():
|
||||
# location = geolocator.geocode(xd)
|
||||
# if xd == "BIEBRZA-PIEŃCZYKÓWEK":
|
||||
# pass
|
||||
# else:
|
||||
# print(xd)
|
||||
# geo_lat[xd] = location.latitude
|
||||
# geo_long[xd] = location.longitude
|
||||
#
|
||||
#
|
||||
# x_test["latitude"] = x_test["nazwa_stacji"].map(geo_lat)
|
||||
# x_test["longitude"] = x_test["nazwa_stacji"].map(geo_long)
|
||||
|
||||
x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
||||
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
|
||||
|
||||
poly = PolynomialFeatures(2, interaction_only=True)
|
||||
x_test = x_test.iloc[:-8760]
|
||||
x_test = poly.fit_transform(x_test)
|
||||
# poly = PolynomialFeatures(2, interaction_only=True)
|
||||
# x_test2 = poly.fit_transform(x_test)
|
||||
pred = model.predict(x_test)
|
||||
# pred = model.predict(x_test)
|
||||
test_A_predicted = regr_trans.predict(x_test)
|
||||
|
||||
out = pd.DataFrame(pred)
|
||||
|
||||
out = pd.DataFrame(test_A_predicted)
|
||||
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)
|
||||
|
36
run3.py
36
run3.py
@ -7,8 +7,10 @@ import pandas as pd
|
||||
import xgboost as xg
|
||||
|
||||
import tensorflow.keras
|
||||
from keras.layers import Dropout
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense
|
||||
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
|
||||
|
||||
in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]
|
||||
df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
||||
@ -20,28 +22,44 @@ x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
||||
x = x.iloc[:-600]
|
||||
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])
|
||||
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Dense(1024, activation="relu", input_dim=73),
|
||||
Dense(512, activation="relu"),
|
||||
Dense(2048, activation="relu", input_dim=73),
|
||||
Dense(1024, activation="relu"),
|
||||
Dense(224, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 2, activation="relu"),
|
||||
Dense(320, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 4, activation="relu"),
|
||||
Dense(384, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(512 // 8, activation="relu"),
|
||||
Dense(416, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(32, activation="relu"),
|
||||
Dense(448, activation="relu"),
|
||||
Dense(448, activation="relu"),
|
||||
Dense(256, activation="relu"),
|
||||
# tensorflow.keras.layers.BatchNormalization(),
|
||||
Dense(1, activation="linear"),
|
||||
]
|
||||
)
|
||||
|
||||
# input = tensorflow.keras.layers.Input(shape=x.shape[1:])
|
||||
# hidden1 = tensorflow.keras.layers.Dense(1024, activation='relu')(input)
|
||||
# hidden2 = tensorflow.keras.layers.Dense(512, activation='relu')(hidden1)
|
||||
# hidden3 = tensorflow.keras.layers.Dense(256, activation='relu')(hidden2)
|
||||
# hidden4 = tensorflow.keras.layers.Dense(128, activation='relu')(hidden3)
|
||||
# concat = tensorflow.keras.layers.Concatenate()([input, hidden4])
|
||||
# output = tensorflow.keras.layers.Dense(1, activation="linear")(concat)
|
||||
# model = tensorflow.keras.models.Model(inputs=[input], outputs=[output])
|
||||
|
||||
model.compile(
|
||||
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
|
||||
)
|
||||
model.fit(x, y, epochs=100)
|
||||
|
||||
# estimator = KerasRegressor(build_fn=model, epochs=100, batch_size=10, verbose=0)
|
||||
# estimator.fit(x, y)
|
||||
model.fit(x, y, epochs=100)
|
||||
# exit()
|
||||
x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
|
||||
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
|
||||
|
||||
@ -50,7 +68,7 @@ x_test = pd.concat([x_test, df_train])
|
||||
x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
|
||||
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
|
||||
x_test = x_test.iloc[:-8760]
|
||||
|
||||
pred = model.predict(x_test)
|
||||
# pred = estimator.predict(x_test)
|
||||
out = pd.DataFrame(pred)
|
||||
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)
|
||||
out.to_csv("test-A/out2.tsv", sep="\t", header=False, index=False)
|
||||
|
1440
test-A/out.tsv
1440
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user