precipitation-pl/run2.py

import pandas as pd

import tensorflow.keras
from sklearn.linear_model import LinearRegression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import pandas as pd
import xgboost as xg
from sklearn.compose import TransformedTargetRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import (
    QuantileTransformer,
    StandardScaler,
    PolynomialFeatures,
)

import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# Import the required library
from geopy.geocoders import Nominatim

# Initialize Nominatim API
geolocator = Nominatim(user_agent="MyApp")


in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]
df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")
df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")

df = pd.concat([df, df_test])
# df = df.drop(["nazwa_stacji"], axis=1)
x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])
# geo_lat = {
#     "BIEBRZA-PIEŃCZYKÓWEK" : 53.65
# }
# geo_long = {
#     "BIEBRZA-PIEŃCZYKÓWEK": 22.58
# }
# for xd in x["nazwa_stacji"].unique():
#     location = geolocator.geocode(xd)
#     if xd == "BIEBRZA-PIEŃCZYKÓWEK":
#         pass
#     else:
#         print(xd)
#         geo_lat[xd] = location.latitude
#         geo_long[xd] = location.longitude
#
#
# x["latitude"] = x["nazwa_stacji"].map(geo_lat)
# x["longitude"] = x["nazwa_stacji"].map(geo_long)
x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
poly = PolynomialFeatures(2, interaction_only=True)
#
# print(x)
# print(geo_lat)
# print(geo_long)
x = x.iloc[:-600]
x = poly.fit_transform(x)
y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])


from sklearn.preprocessing import PolynomialFeatures
# xxx
# poly = PolynomialFeatures(2, interaction_only=True)
# df = poly.fit_transform(x)
param_grid = {
    "n_estimators": [100, 80, 60, 55, 51, 45],
    "max_depth": [7, 8],
    "reg_lambda": [0.26, 0.25, 0.2],
}

grid = GridSearchCV(
    xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1
)  #
regr_trans = TransformedTargetRegressor(
    regressor=grid, transformer=QuantileTransformer(output_distribution="normal")
)

# fitting the model for grid search
grid_result = regr_trans.fit(x, y)
best_params = grid_result.regressor_.best_params_

# using best params to create and fit model
best_model = xg.XGBRFRegressor(
    max_depth=best_params["max_depth"],
    n_estimators=best_params["n_estimators"],
    reg_lambda=best_params["reg_lambda"],
)
regr_trans = TransformedTargetRegressor(
    regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")
)

regr_trans.fit(x, y)

# model = Sequential(
#     [
#         Dense(512, activation="relu", input_dim=75),
#         tensorflow.keras.layers.BatchNormalization(),
#         Dense(512 // 2, activation="relu"),
#         tensorflow.keras.layers.BatchNormalization(),
#         Dense(512 // 4, activation="relu"),
#         tensorflow.keras.layers.BatchNormalization(),
#         Dense(512 // 8, activation="relu"),
#         tensorflow.keras.layers.BatchNormalization(),
#         Dense(32, activation="relu"),
#         tensorflow.keras.layers.BatchNormalization(),
#         Dense(1),
#     ]
# )
#
# model.compile(
#     loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]
# )
# model.fit(x, y, epochs=100)

x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)
df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")

# geo_lat = {
#     "BIEBRZA-PIEŃCZYKÓWEK" : 53.65
# }
# geo_long = {
#     "BIEBRZA-PIEŃCZYKÓWEK": 22.58
# }
x_test = pd.concat([x_test, df_train])

# for xd in x_test["nazwa_stacji"].unique():
#     location = geolocator.geocode(xd)
#     if xd == "BIEBRZA-PIEŃCZYKÓWEK":
#         pass
#     else:
#         print(xd)
#         geo_lat[xd] = location.latitude
#         geo_long[xd] = location.longitude
#
#
# x_test["latitude"] = x_test["nazwa_stacji"].map(geo_lat)
# x_test["longitude"] = x_test["nazwa_stacji"].map(geo_long)

x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)
x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])
poly = PolynomialFeatures(2, interaction_only=True)
x_test = x_test.iloc[:-8760]
x_test = poly.fit_transform(x_test)
# poly = PolynomialFeatures(2, interaction_only=True)
# x_test2 = poly.fit_transform(x_test)
# pred = model.predict(x_test)
test_A_predicted = regr_trans.predict(x_test)


out = pd.DataFrame(test_A_predicted)
out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)
xx 2022-05-22 12:33:21 +02:00			`import pandas as pd`

			`import tensorflow.keras`
test poly 2022-05-22 12:53:20 +02:00			`from sklearn.linear_model import LinearRegression`
xx 2022-05-22 12:33:21 +02:00			`from tensorflow.keras.models import Sequential`
			`from tensorflow.keras.layers import Dense`

final attempt 2022-05-23 17:53:45 +02:00			`import numpy as np`
			`import pandas as pd`
			`import xgboost as xg`
			`from sklearn.compose import TransformedTargetRegressor`
			`from sklearn.model_selection import GridSearchCV`
			`from sklearn.preprocessing import (`
			`QuantileTransformer,`
			`StandardScaler,`
			`PolynomialFeatures,`
			`)`

			`import tensorflow.keras`
			`from tensorflow.keras.models import Sequential`
			`from tensorflow.keras.layers import Dense`
test geo 2022-05-22 18:32:43 +02:00			`# Import the required library`
			`from geopy.geocoders import Nominatim`

			`# Initialize Nominatim API`
			`geolocator = Nominatim(user_agent="MyApp")`



xx 2022-05-22 12:33:21 +02:00			`in_columns = ["id_stacji", "nazwa_stacji", "typ_zbioru", "rok", "miesiąc"]`
			`df = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")`
			`df_test = pd.read_csv("dev-0/in.tsv", names=in_columns, sep="\t")`

			`df = pd.concat([df, df_test])`
test geo 2022-05-22 18:32:43 +02:00			`# df = df.drop(["nazwa_stacji"], axis=1)`
			`x = pd.get_dummies(df, columns=["id_stacji", "rok", "miesiąc"])`
final attempt 2022-05-23 17:53:45 +02:00			`# geo_lat = {`
			`# "BIEBRZA-PIEŃCZYKÓWEK" : 53.65`
			`# }`
			`# geo_long = {`
			`# "BIEBRZA-PIEŃCZYKÓWEK": 22.58`
			`# }`
			`# for xd in x["nazwa_stacji"].unique():`
			`# location = geolocator.geocode(xd)`
			`# if xd == "BIEBRZA-PIEŃCZYKÓWEK":`
			`# pass`
			`# else:`
			`# print(xd)`
			`# geo_lat[xd] = location.latitude`
			`# geo_long[xd] = location.longitude`
			`#`
			`#`
			`# x["latitude"] = x["nazwa_stacji"].map(geo_lat)`
			`# x["longitude"] = x["nazwa_stacji"].map(geo_long)`
test geo 2022-05-22 18:32:43 +02:00			`x = x.drop(["nazwa_stacji", "typ_zbioru"], axis=1)`
final attempt 2022-05-23 17:53:45 +02:00			`poly = PolynomialFeatures(2, interaction_only=True)`
			`#`
			`# print(x)`
			`# print(geo_lat)`
			`# print(geo_long)`
xx 2022-05-22 12:33:21 +02:00			`x = x.iloc[:-600]`
final attempt 2022-05-23 17:53:45 +02:00			`x = poly.fit_transform(x)`
xx 2022-05-22 12:33:21 +02:00			`y = pd.read_csv("train/expected.tsv", sep="\t", names=["rainfall"])`

test poly 2022-05-22 12:53:20 +02:00
			`from sklearn.preprocessing import PolynomialFeatures`
test poly keras 2 2022-05-22 13:21:03 +02:00			`# xxx`
test geo 2022-05-22 18:32:43 +02:00			`# poly = PolynomialFeatures(2, interaction_only=True)`
			`# df = poly.fit_transform(x)`
final attempt 2022-05-23 17:53:45 +02:00			`param_grid = {`
			`"n_estimators": [100, 80, 60, 55, 51, 45],`
			`"max_depth": [7, 8],`
			`"reg_lambda": [0.26, 0.25, 0.2],`
			`}`
test poly keras 2022-05-22 13:05:09 +02:00
final attempt 2022-05-23 17:53:45 +02:00			`grid = GridSearchCV(`
			`xg.XGBRFRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1`
			`) #`
			`regr_trans = TransformedTargetRegressor(`
			`regressor=grid, transformer=QuantileTransformer(output_distribution="normal")`
test poly keras 2022-05-22 13:05:09 +02:00			`)`

final attempt 2022-05-23 17:53:45 +02:00			`# fitting the model for grid search`
			`grid_result = regr_trans.fit(x, y)`
			`best_params = grid_result.regressor_.best_params_`

			`# using best params to create and fit model`
			`best_model = xg.XGBRFRegressor(`
			`max_depth=best_params["max_depth"],`
			`n_estimators=best_params["n_estimators"],`
			`reg_lambda=best_params["reg_lambda"],`
			`)`
			`regr_trans = TransformedTargetRegressor(`
			`regressor=best_model, transformer=QuantileTransformer(output_distribution="normal")`
test poly keras 2022-05-22 13:05:09 +02:00			`)`
final attempt 2022-05-23 17:53:45 +02:00
			`regr_trans.fit(x, y)`

			`# model = Sequential(`
			`# [`
			`# Dense(512, activation="relu", input_dim=75),`
			`# tensorflow.keras.layers.BatchNormalization(),`
			`# Dense(512 // 2, activation="relu"),`
			`# tensorflow.keras.layers.BatchNormalization(),`
			`# Dense(512 // 4, activation="relu"),`
			`# tensorflow.keras.layers.BatchNormalization(),`
			`# Dense(512 // 8, activation="relu"),`
			`# tensorflow.keras.layers.BatchNormalization(),`
			`# Dense(32, activation="relu"),`
			`# tensorflow.keras.layers.BatchNormalization(),`
			`# Dense(1),`
			`# ]`
			`# )`
			`#`
			`# model.compile(`
			`# loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]`
			`# )`
			`# model.fit(x, y, epochs=100)`
xx 2022-05-22 12:33:21 +02:00
			`x_test = pd.read_csv("test-A/in.tsv", sep="\t", names=in_columns)`
			`df_train = pd.read_csv("train/in.tsv", names=in_columns, sep="\t")`

final attempt 2022-05-23 17:53:45 +02:00			`# geo_lat = {`
			`# "BIEBRZA-PIEŃCZYKÓWEK" : 53.65`
			`# }`
			`# geo_long = {`
			`# "BIEBRZA-PIEŃCZYKÓWEK": 22.58`
			`# }`
xx 2022-05-22 12:33:21 +02:00			`x_test = pd.concat([x_test, df_train])`
test geo 2022-05-22 18:32:43 +02:00
final attempt 2022-05-23 17:53:45 +02:00			`# for xd in x_test["nazwa_stacji"].unique():`
			`# location = geolocator.geocode(xd)`
			`# if xd == "BIEBRZA-PIEŃCZYKÓWEK":`
			`# pass`
			`# else:`
			`# print(xd)`
			`# geo_lat[xd] = location.latitude`
			`# geo_long[xd] = location.longitude`
			`#`
			`#`
			`# x_test["latitude"] = x_test["nazwa_stacji"].map(geo_lat)`
			`# x_test["longitude"] = x_test["nazwa_stacji"].map(geo_long)`
test geo 2022-05-22 18:32:43 +02:00
			`x_test = x_test.drop(["nazwa_stacji", "typ_zbioru"], axis=1)`
			`x_test = pd.get_dummies(x_test, columns=["id_stacji", "rok", "miesiąc"])`
final attempt 2022-05-23 17:53:45 +02:00			`poly = PolynomialFeatures(2, interaction_only=True)`
xx 2022-05-22 12:33:21 +02:00			`x_test = x_test.iloc[:-8760]`
final attempt 2022-05-23 17:53:45 +02:00			`x_test = poly.fit_transform(x_test)`
test geo 2022-05-22 18:32:43 +02:00			`# poly = PolynomialFeatures(2, interaction_only=True)`
			`# x_test2 = poly.fit_transform(x_test)`
final attempt 2022-05-23 17:53:45 +02:00			`# pred = model.predict(x_test)`
			`test_A_predicted = regr_trans.predict(x_test)`

xx 2022-05-22 12:33:21 +02:00
final attempt 2022-05-23 17:53:45 +02:00			`out = pd.DataFrame(test_A_predicted)`
xx 2022-05-22 12:33:21 +02:00			`out.to_csv("test-A/out.tsv", sep="\t", header=False, index=False)`