import numpy as np import pandas as pd from matplotlib import pyplot as plt in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc'] df = pd.read_csv('train/in.tsv', header=None, sep='\t') df.columns = in_columns measurements = pd.read_csv('train/expected.tsv', header=None, sep='\t') measurements.columns = ['suma_opadów'] start_year = 1981 end_year = 2021 total_years = end_year - start_year total_months = total_years * 12 known_years = 30 stations = [ 249180010, 249190560, 249200370, 249200490, 249220150, 249220180, 250190160, 250190390, 250210130, 251170090, 251210040, 252150120, 252160230, 252200150, 252210050, 252230120, 253170210, 253220070, 253230020, 254200080, 254220090 ] station_to_idx = {station: i for i, station in enumerate(stations)} x = np.full((len(stations), total_months), fill_value=-1) for (_, df_row), (_, measurement) in zip(df.iterrows(), measurements.iterrows()): station_id = df_row['id_stacji'] station_idx = station_to_idx[station_id] year = df_row['rok'] month = df_row['miesiąc'] - 1 assert start_year <= year < end_year, year assert 0 <= month < 12 absolute_month = (year - start_year) * 12 + month x[station_idx, absolute_month] = measurement z = x.reshape((len(stations), total_years, 12)) fully_known: np.ndarray = z[:, :known_years] all_time_std = fully_known.std((1, 2)) all_time_mean = fully_known.mean((1, 2)) std_per_month = fully_known.std(1) mean_per_month = fully_known.mean(1) df = pd.read_csv('test-A/in.tsv', header=None, sep='\t') df.columns = in_columns # plt.plot(fully_known.T) with open('test-A/out.tsv', 'w+') as f: for _, df_row in df.iterrows(): station_id = df_row['id_stacji'] station_idx = station_to_idx[station_id] # year = df_row['rok'] month = df_row['miesiąc'] - 1 assert 0 <= month < 12 print(mean_per_month[station_idx, month], file=f)