precipitation-pl/run2.py

110 lines
3.0 KiB
Python
Raw Normal View History

2022-05-22 12:33:21 +02:00
import pandas as pd
import tensorflow.keras
2022-05-22 12:53:20 +02:00
from sklearn.linear_model import LinearRegression
2022-05-22 12:33:21 +02:00
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
2022-05-22 18:32:43 +02:00
# Import the required library
from geopy.geocoders import Nominatim
# Initialize Nominatim API
geolocator = Nominatim(user_agent="MyApp")
2022-05-22 12:33:21 +02:00
in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]
df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")
df = pd.concat([df, df_test])
2022-05-22 18:32:43 +02:00
# df = df.drop(["nazwa_stacji"], axis=1)
x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])
geo_lat = {
"BIEBRZA-PIEŃCZYKÓWEK" : 53.65
}
geo_long = {
"BIEBRZA-PIEŃCZYKÓWEK": 22.58
}
for xd in x["nazwa_stacji"].unique():
location = geolocator.geocode(xd)
if xd == "BIEBRZA-PIEŃCZYKÓWEK":
pass
else:
print(xd)
geo_lat[xd] = location.latitude
geo_long[xd] = location.longitude
2022-05-22 12:33:21 +02:00
2022-05-22 18:32:43 +02:00
x["latitude"] = x["nazwa_stacji"].map(geo_lat)
x["longitude"] = x["nazwa_stacji"].map(geo_long)
x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
print(x)
print(geo_lat)
print(geo_long)
2022-05-22 12:33:21 +02:00
x = x.iloc[:-600]
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])
2022-05-22 12:53:20 +02:00
from sklearn.preprocessing import PolynomialFeatures
2022-05-22 13:21:03 +02:00
# xxx
2022-05-22 18:32:43 +02:00
# poly = PolynomialFeatures(2, interaction_only=True)
# df = poly.fit_transform(x)
2022-05-22 13:05:09 +02:00
model = Sequential(
[
2022-05-22 18:32:43 +02:00
Dense(512, activation="relu", input_dim=75),
2022-05-22 13:05:09 +02:00
tensorflow.keras.layers.BatchNormalization(),
2022-05-22 13:09:54 +02:00
Dense(512 // 2, activation="relu"),
2022-05-22 13:05:09 +02:00
tensorflow.keras.layers.BatchNormalization(),
2022-05-22 13:09:54 +02:00
Dense(512 // 4, activation="relu"),
2022-05-22 13:05:09 +02:00
tensorflow.keras.layers.BatchNormalization(),
2022-05-22 13:09:54 +02:00
Dense(512 // 8, activation="relu"),
2022-05-22 13:05:09 +02:00
tensorflow.keras.layers.BatchNormalization(),
2022-05-22 13:09:54 +02:00
Dense(32, activation="relu"),
2022-05-22 13:05:09 +02:00
tensorflow.keras.layers.BatchNormalization(),
Dense(1),
]
)
model.compile(
loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
)
2022-05-22 18:32:43 +02:00
model.fit(x, y, epochs=100)
2022-05-22 12:33:21 +02:00
x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
2022-05-22 18:32:43 +02:00
geo_lat = {
"BIEBRZA-PIEŃCZYKÓWEK" : 53.65
}
geo_long = {
"BIEBRZA-PIEŃCZYKÓWEK": 22.58
}
2022-05-22 12:33:21 +02:00
x_test = pd.concat([x_test, df_train])
2022-05-22 18:32:43 +02:00
for xd in x_test["nazwa_stacji"].unique():
location = geolocator.geocode(xd)
if xd == "BIEBRZA-PIEŃCZYKÓWEK":
pass
else:
print(xd)
geo_lat[xd] = location.latitude
geo_long[xd] = location.longitude
x_test["latitude"] = x_test["nazwa_stacji"].map(geo_lat)
x_test["longitude"] = x_test["nazwa_stacji"].map(geo_long)
x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
2022-05-22 12:33:21 +02:00
x_test = x_test.iloc[:-8760]
2022-05-22 18:32:43 +02:00
# poly = PolynomialFeatures(2, interaction_only=True)
# x_test2 = poly.fit_transform(x_test)
pred = model.predict(x_test)
2022-05-22 12:33:21 +02:00
out = pd.DataFrame(pred)
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)