import pandas as pd import tensorflow.keras from sklearn.linear_model import LinearRegression from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense # Import the required library from geopy.geocoders import Nominatim # Initialize Nominatim API geolocator = Nominatim(user_agent="MyApp") in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"] df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t") df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t") df = pd.concat([df, df_test]) # df = df.drop(["nazwa_stacji"], axis=1) x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"]) geo_lat = { "BIEBRZA-PIEŃCZYKÓWEK" : 53.65 } geo_long = { "BIEBRZA-PIEŃCZYKÓWEK": 22.58 } for xd in x["nazwa_stacji"].unique(): location = geolocator.geocode(xd) if xd == "BIEBRZA-PIEŃCZYKÓWEK": pass else: print(xd) geo_lat[xd] = location.latitude geo_long[xd] = location.longitude x["latitude"] = x["nazwa_stacji"].map(geo_lat) x["longitude"] = x["nazwa_stacji"].map(geo_long) x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1) print(x) print(geo_lat) print(geo_long) x = x.iloc[:-600] y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"]) from sklearn.preprocessing import PolynomialFeatures # xxx # poly = PolynomialFeatures(2, interaction_only=True) # df = poly.fit_transform(x) model = Sequential( [ Dense(512, activation="relu", input_dim=75), tensorflow.keras.layers.BatchNormalization(), Dense(512 // 2, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(512 // 4, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(512 // 8, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(32, activation="relu"), tensorflow.keras.layers.BatchNormalization(), Dense(1), ] ) model.compile( loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"] ) model.fit(x, y, epochs=100) x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns) df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t") geo_lat = { "BIEBRZA-PIEŃCZYKÓWEK" : 53.65 } geo_long = { "BIEBRZA-PIEŃCZYKÓWEK": 22.58 } x_test = pd.concat([x_test, df_train]) for xd in x_test["nazwa_stacji"].unique(): location = geolocator.geocode(xd) if xd == "BIEBRZA-PIEŃCZYKÓWEK": pass else: print(xd) geo_lat[xd] = location.latitude geo_long[xd] = location.longitude x_test["latitude"] = x_test["nazwa_stacji"].map(geo_lat) x_test["longitude"] = x_test["nazwa_stacji"].map(geo_long) x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1) x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"]) x_test = x_test.iloc[:-8760] # poly = PolynomialFeatures(2, interaction_only=True) # x_test2 = poly.fit_transform(x_test) pred = model.predict(x_test) out = pd.DataFrame(pred) out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)