s434749
This commit is contained in:
parent
36bef951e1
commit
858c7511df
63
run.py
63
run.py
@ -1,6 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
|
||||||
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']
|
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']
|
||||||
|
|
||||||
@ -52,22 +53,76 @@ for (_, df_row), (_, measurement) in zip(df.iterrows(), measurements.iterrows())
|
|||||||
absolute_month = (year - start_year) * 12 + month
|
absolute_month = (year - start_year) * 12 + month
|
||||||
x[station_idx, absolute_month] = measurement
|
x[station_idx, absolute_month] = measurement
|
||||||
|
|
||||||
|
test_in = pd.read_csv('dev-0/in.tsv', header=None, sep='\t')
|
||||||
|
test_in.columns = in_columns
|
||||||
|
|
||||||
|
test_exp = pd.read_csv('dev-0/expected.tsv', header=None, sep='\t')
|
||||||
|
test_exp.columns = ['suma_opadów']
|
||||||
|
|
||||||
|
for (_, df_row), (_, measurement) in zip(test_in.iterrows(), test_exp.iterrows()):
|
||||||
|
station_id = df_row['id_stacji']
|
||||||
|
station_idx = station_to_idx[station_id]
|
||||||
|
year = df_row['rok']
|
||||||
|
month = df_row['miesiąc'] - 1
|
||||||
|
assert start_year <= year < end_year, year
|
||||||
|
assert 0 <= month < 12
|
||||||
|
absolute_month = (year - start_year) * 12 + month
|
||||||
|
assert x[station_idx, absolute_month] == -1
|
||||||
|
x[station_idx, absolute_month] = measurement
|
||||||
|
|
||||||
z = x.reshape((len(stations), total_years, 12))
|
z = x.reshape((len(stations), total_years, 12))
|
||||||
fully_known: np.ndarray = z[:, :known_years]
|
fully_known: np.ndarray = z[:, :known_years]
|
||||||
|
assert (fully_known == -1).sum() == 0
|
||||||
all_time_std = fully_known.std((1, 2))
|
all_time_std = fully_known.std((1, 2))
|
||||||
all_time_mean = fully_known.mean((1, 2))
|
all_time_mean = fully_known.mean((1, 2))
|
||||||
std_per_month = fully_known.std(1)
|
std_per_month = fully_known.std(1)
|
||||||
mean_per_month = fully_known.mean(1)
|
mean_per_month = fully_known.mean(1)
|
||||||
|
|
||||||
|
missing_stations = np.unique(np.where(x == -1)[0])
|
||||||
|
missing_entries = len(missing_stations) * (total_years - known_years) * 12
|
||||||
|
assert (z[missing_stations, known_years:] == -1).sum() == missing_entries
|
||||||
|
assert (x == -1).sum() == missing_entries
|
||||||
|
# plt.plot(fully_known.reshape(len(stations),-1).T)
|
||||||
|
# plt.show()
|
||||||
|
all_stations = np.arange(len(stations))
|
||||||
|
known_stations = np.delete(all_stations, missing_stations)
|
||||||
|
entries_of_fully_known_stations = z[known_stations]
|
||||||
|
assert (entries_of_fully_known_stations == -1).sum() == 0
|
||||||
|
known_entries_of_partially_known_stations = z[missing_stations, :known_years]
|
||||||
|
|
||||||
|
model_per_month = [LinearRegression() for _ in range(12)]
|
||||||
|
for month in range(12):
|
||||||
|
model = model_per_month[month]
|
||||||
|
u = entries_of_fully_known_stations[:, :known_years, month].T
|
||||||
|
v = known_entries_of_partially_known_stations[:, :, month].T
|
||||||
|
model.fit(u, v)
|
||||||
|
p = model.predict(u)
|
||||||
|
rmse = np.mean((p - v) ** 2)
|
||||||
|
m = mean_per_month[missing_stations, month]
|
||||||
|
rmse2 = np.mean((m - v) ** 2)
|
||||||
|
print(rmse, "/", rmse2)
|
||||||
|
|
||||||
|
z_prev = z.copy()
|
||||||
|
|
||||||
|
for month in range(12):
|
||||||
|
model = model_per_month[month]
|
||||||
|
u = entries_of_fully_known_stations[:, known_years:, month].T
|
||||||
|
p = model.predict(u)
|
||||||
|
p[p<0] = 0
|
||||||
|
assert np.all(z[missing_stations, known_years:, month] == -1)
|
||||||
|
z[missing_stations, known_years:, month] = p.T
|
||||||
|
assert np.all(z != -1)
|
||||||
df = pd.read_csv('test-A/in.tsv', header=None, sep='\t')
|
df = pd.read_csv('test-A/in.tsv', header=None, sep='\t')
|
||||||
df.columns = in_columns
|
df.columns = in_columns
|
||||||
|
|
||||||
# plt.plot(fully_known.T)
|
|
||||||
with open('test-A/out.tsv', 'w+') as f:
|
with open('test-A/out.tsv', 'w+') as f:
|
||||||
for _, df_row in df.iterrows():
|
for _, df_row in df.iterrows():
|
||||||
station_id = df_row['id_stacji']
|
station_id = df_row['id_stacji']
|
||||||
station_idx = station_to_idx[station_id]
|
station_idx = station_to_idx[station_id]
|
||||||
# year = df_row['rok']
|
year = df_row['rok']
|
||||||
month = df_row['miesiąc'] - 1
|
month = df_row['miesiąc'] - 1
|
||||||
|
assert start_year <= year < end_year, year
|
||||||
assert 0 <= month < 12
|
assert 0 <= month < 12
|
||||||
print(mean_per_month[station_idx, month], file=f)
|
year = year - start_year
|
||||||
|
assert z_prev[station_idx, year, month] == -1
|
||||||
|
assert z[station_idx, year, month] != -1
|
||||||
|
print(z[station_idx, year, month], file=f)
|
||||||
|
1440
test-A/out.tsv
1440
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user