precipitation-pl/run.py
Aleksander Mendoza 36bef951e1 s434749
2022-05-15 16:14:24 +02:00

74 lines
2.0 KiB
Python

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']
df = pd.read_csv('train/in.tsv', header=None, sep='\t')
df.columns = in_columns
measurements = pd.read_csv('train/expected.tsv', header=None, sep='\t')
measurements.columns = ['suma_opadów']
start_year = 1981
end_year = 2021
total_years = end_year - start_year
total_months = total_years * 12
known_years = 30
stations = [
249180010,
249190560,
249200370,
249200490,
249220150,
249220180,
250190160,
250190390,
250210130,
251170090,
251210040,
252150120,
252160230,
252200150,
252210050,
252230120,
253170210,
253220070,
253230020,
254200080,
254220090
]
station_to_idx = {station: i for i, station in enumerate(stations)}
x = np.full((len(stations), total_months), fill_value=-1)
for (_, df_row), (_, measurement) in zip(df.iterrows(), measurements.iterrows()):
station_id = df_row['id_stacji']
station_idx = station_to_idx[station_id]
year = df_row['rok']
month = df_row['miesiąc'] - 1
assert start_year <= year < end_year, year
assert 0 <= month < 12
absolute_month = (year - start_year) * 12 + month
x[station_idx, absolute_month] = measurement
z = x.reshape((len(stations), total_years, 12))
fully_known: np.ndarray = z[:, :known_years]
all_time_std = fully_known.std((1, 2))
all_time_mean = fully_known.mean((1, 2))
std_per_month = fully_known.std(1)
mean_per_month = fully_known.mean(1)
df = pd.read_csv('test-A/in.tsv', header=None, sep='\t')
df.columns = in_columns
# plt.plot(fully_known.T)
with open('test-A/out.tsv', 'w+') as f:
for _, df_row in df.iterrows():
station_id = df_row['id_stacji']
station_idx = station_to_idx[station_id]
# year = df_row['rok']
month = df_row['miesiąc'] - 1
assert 0 <= month < 12
print(mean_per_month[station_idx, month], file=f)