import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', header=None, sep='\t')
df.columns = in_columns

measurements = pd.read_csv('train/expected.tsv', header=None, sep='\t')
measurements.columns = ['suma_opadów']

start_year = 1981
end_year = 2021
total_years = end_year - start_year
total_months = total_years * 12
known_years = 30

stations = [
    249180010,
    249190560,
    249200370,
    249200490,
    249220150,
    249220180,
    250190160,
    250190390,
    250210130,
    251170090,
    251210040,
    252150120,
    252160230,
    252200150,
    252210050,
    252230120,
    253170210,
    253220070,
    253230020,
    254200080,
    254220090
]
station_to_idx = {station: i for i, station in enumerate(stations)}
x = np.full((len(stations), total_months), fill_value=-1)

for (_, df_row), (_, measurement) in zip(df.iterrows(), measurements.iterrows()):
    station_id = df_row['id_stacji']
    station_idx = station_to_idx[station_id]
    year = df_row['rok']
    month = df_row['miesiąc'] - 1
    assert start_year <= year < end_year, year
    assert 0 <= month < 12
    absolute_month = (year - start_year) * 12 + month
    x[station_idx, absolute_month] = measurement

z = x.reshape((len(stations), total_years, 12))
fully_known: np.ndarray = z[:, :known_years]
all_time_std = fully_known.std((1, 2))
all_time_mean = fully_known.mean((1, 2))
std_per_month = fully_known.std(1)
mean_per_month = fully_known.mean(1)

df = pd.read_csv('test-A/in.tsv', header=None, sep='\t')
df.columns = in_columns

# plt.plot(fully_known.T)
with open('test-A/out.tsv', 'w+') as f:
    for _, df_row in df.iterrows():
        station_id = df_row['id_stacji']
        station_idx = station_to_idx[station_id]
        # year = df_row['rok']
        month = df_row['miesiąc'] - 1
        assert 0 <= month < 12
        print(mean_per_month[station_idx, month], file=f)