precipitation-pl/solution.ipynb
2022-05-21 19:36:08 +02:00

81 KiB
Raw Blame History

# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
len(df)
8760
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
9480
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x = x.iloc[:-720]
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

8760 rows × 73 columns

y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y
rainfall
0 19.4
1 43.2
2 72.2
3 25.3
4 89.3
... ...
8755 114.9
8756 101.2
8757 20.4
8758 93.2
8759 46.9

8760 rows × 1 columns

# Define model
model = Sequential()
model.add(Dense(16, input_dim=73, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense (None, 73) float32
dense_1 (None, 16) float32
dense_2 (None, 32) float32
dense_3 (None, 64) float32
dense_4 (None, 32) float32
dense_5 (None, 16) float32
[None, None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=100)
Epoch 1/100
274/274 [==============================] - 1s 1ms/step - loss: 1904.0205 - mean_squared_error: 1904.0205
Epoch 2/100
274/274 [==============================] - 0s 1ms/step - loss: 977.0018 - mean_squared_error: 977.0018
Epoch 3/100
274/274 [==============================] - 0s 1ms/step - loss: 930.0125 - mean_squared_error: 930.0125
Epoch 4/100
274/274 [==============================] - 0s 1ms/step - loss: 902.6553 - mean_squared_error: 902.6553
Epoch 5/100
274/274 [==============================] - 0s 1ms/step - loss: 863.2485 - mean_squared_error: 863.2485
Epoch 6/100
274/274 [==============================] - 0s 1ms/step - loss: 811.9504 - mean_squared_error: 811.9504
Epoch 7/100
274/274 [==============================] - 0s 1ms/step - loss: 770.9260 - mean_squared_error: 770.9260
Epoch 8/100
274/274 [==============================] - 0s 1ms/step - loss: 724.6091 - mean_squared_error: 724.6091
Epoch 9/100
274/274 [==============================] - 0s 1ms/step - loss: 692.6209 - mean_squared_error: 692.6209
Epoch 10/100
274/274 [==============================] - 0s 1ms/step - loss: 659.7095 - mean_squared_error: 659.7095
Epoch 11/100
274/274 [==============================] - 0s 1ms/step - loss: 625.7371 - mean_squared_error: 625.7371
Epoch 12/100
274/274 [==============================] - 0s 1ms/step - loss: 602.4116 - mean_squared_error: 602.4116
Epoch 13/100
274/274 [==============================] - 0s 1ms/step - loss: 577.0346 - mean_squared_error: 577.0346
Epoch 14/100
274/274 [==============================] - 0s 1ms/step - loss: 552.9323 - mean_squared_error: 552.9323
Epoch 15/100
274/274 [==============================] - 0s 1ms/step - loss: 529.7372 - mean_squared_error: 529.7372
Epoch 16/100
274/274 [==============================] - 0s 1ms/step - loss: 515.2844 - mean_squared_error: 515.2844
Epoch 17/100
274/274 [==============================] - 0s 1ms/step - loss: 501.1700 - mean_squared_error: 501.1700
Epoch 18/100
274/274 [==============================] - 0s 1ms/step - loss: 489.9219 - mean_squared_error: 489.9219
Epoch 19/100
274/274 [==============================] - 0s 1ms/step - loss: 484.0696 - mean_squared_error: 484.0696
Epoch 20/100
274/274 [==============================] - 0s 1ms/step - loss: 470.3400 - mean_squared_error: 470.3400
Epoch 21/100
274/274 [==============================] - 0s 1ms/step - loss: 459.1194 - mean_squared_error: 459.1194
Epoch 22/100
274/274 [==============================] - 0s 1ms/step - loss: 455.5881 - mean_squared_error: 455.5881
Epoch 23/100
274/274 [==============================] - 0s 1ms/step - loss: 446.4247 - mean_squared_error: 446.4247
Epoch 24/100
274/274 [==============================] - 0s 1ms/step - loss: 440.6260 - mean_squared_error: 440.6260
Epoch 25/100
274/274 [==============================] - 0s 1ms/step - loss: 434.9443 - mean_squared_error: 434.9443
Epoch 26/100
274/274 [==============================] - 0s 1ms/step - loss: 429.9223 - mean_squared_error: 429.9223
Epoch 27/100
274/274 [==============================] - 0s 1ms/step - loss: 424.0781 - mean_squared_error: 424.0781
Epoch 28/100
274/274 [==============================] - 0s 1ms/step - loss: 420.9750 - mean_squared_error: 420.9750
Epoch 29/100
274/274 [==============================] - 0s 1ms/step - loss: 416.1357 - mean_squared_error: 416.1357
Epoch 30/100
274/274 [==============================] - 0s 1ms/step - loss: 409.1339 - mean_squared_error: 409.1339
Epoch 31/100
274/274 [==============================] - 0s 1ms/step - loss: 404.7644 - mean_squared_error: 404.7644
Epoch 32/100
274/274 [==============================] - 0s 1ms/step - loss: 403.4354 - mean_squared_error: 403.4354
Epoch 33/100
274/274 [==============================] - 0s 1ms/step - loss: 398.6223 - mean_squared_error: 398.6223
Epoch 34/100
274/274 [==============================] - 0s 1ms/step - loss: 391.9509 - mean_squared_error: 391.9509
Epoch 35/100
274/274 [==============================] - 0s 1ms/step - loss: 391.3186 - mean_squared_error: 391.3186
Epoch 36/100
274/274 [==============================] - 0s 1ms/step - loss: 388.1175 - mean_squared_error: 388.1175
Epoch 37/100
274/274 [==============================] - 0s 1ms/step - loss: 385.9730 - mean_squared_error: 385.9730
Epoch 38/100
274/274 [==============================] - 0s 1ms/step - loss: 382.0468 - mean_squared_error: 382.0468
Epoch 39/100
274/274 [==============================] - 0s 1ms/step - loss: 376.9197 - mean_squared_error: 376.9197
Epoch 40/100
274/274 [==============================] - 0s 1ms/step - loss: 378.0434 - mean_squared_error: 378.0434
Epoch 41/100
274/274 [==============================] - 0s 1ms/step - loss: 372.7451 - mean_squared_error: 372.7451
Epoch 42/100
274/274 [==============================] - 0s 1ms/step - loss: 368.2292 - mean_squared_error: 368.2292
Epoch 43/100
274/274 [==============================] - 0s 1ms/step - loss: 369.8233 - mean_squared_error: 369.8233
Epoch 44/100
274/274 [==============================] - 0s 1ms/step - loss: 365.3695 - mean_squared_error: 365.3695
Epoch 45/100
274/274 [==============================] - 0s 1ms/step - loss: 363.1947 - mean_squared_error: 363.1947
Epoch 46/100
274/274 [==============================] - 0s 1ms/step - loss: 358.6509 - mean_squared_error: 358.6509
Epoch 47/100
274/274 [==============================] - 0s 1ms/step - loss: 363.4928 - mean_squared_error: 363.4928
Epoch 48/100
274/274 [==============================] - 0s 1ms/step - loss: 359.9735 - mean_squared_error: 359.9735
Epoch 49/100
274/274 [==============================] - 0s 1ms/step - loss: 353.2738 - mean_squared_error: 353.2738
Epoch 50/100
274/274 [==============================] - 0s 1ms/step - loss: 350.3524 - mean_squared_error: 350.3524
Epoch 51/100
274/274 [==============================] - 0s 1ms/step - loss: 349.1338 - mean_squared_error: 349.1338
Epoch 52/100
274/274 [==============================] - 0s 1ms/step - loss: 351.0474 - mean_squared_error: 351.0474
Epoch 53/100
274/274 [==============================] - 0s 1ms/step - loss: 341.8802 - mean_squared_error: 341.8802
Epoch 54/100
274/274 [==============================] - 0s 1ms/step - loss: 341.5201 - mean_squared_error: 341.5201
Epoch 55/100
274/274 [==============================] - 0s 1ms/step - loss: 339.8927 - mean_squared_error: 339.8927
Epoch 56/100
274/274 [==============================] - 0s 1ms/step - loss: 337.5977 - mean_squared_error: 337.5977
Epoch 57/100
274/274 [==============================] - 0s 1ms/step - loss: 341.8250 - mean_squared_error: 341.8250
Epoch 58/100
274/274 [==============================] - 0s 1ms/step - loss: 334.7910 - mean_squared_error: 334.7910
Epoch 59/100
274/274 [==============================] - 0s 1ms/step - loss: 333.3398 - mean_squared_error: 333.3398
Epoch 60/100
274/274 [==============================] - 0s 1ms/step - loss: 330.1293 - mean_squared_error: 330.1293
Epoch 61/100
274/274 [==============================] - 0s 1ms/step - loss: 331.5085 - mean_squared_error: 331.5085
Epoch 62/100
274/274 [==============================] - 0s 1ms/step - loss: 327.4076 - mean_squared_error: 327.4076
Epoch 63/100
274/274 [==============================] - 0s 1ms/step - loss: 328.1978 - mean_squared_error: 328.1978
Epoch 64/100
274/274 [==============================] - 0s 1ms/step - loss: 322.5495 - mean_squared_error: 322.5495
Epoch 65/100
274/274 [==============================] - 0s 1ms/step - loss: 324.4060 - mean_squared_error: 324.4060
Epoch 66/100
274/274 [==============================] - 0s 1ms/step - loss: 319.2129 - mean_squared_error: 319.2129
Epoch 67/100
274/274 [==============================] - 0s 1ms/step - loss: 320.8315 - mean_squared_error: 320.8315
Epoch 68/100
274/274 [==============================] - 0s 1ms/step - loss: 315.9987 - mean_squared_error: 315.9987
Epoch 69/100
274/274 [==============================] - 0s 1ms/step - loss: 314.6494 - mean_squared_error: 314.6494
Epoch 70/100
274/274 [==============================] - 0s 1ms/step - loss: 310.7572 - mean_squared_error: 310.7572
Epoch 71/100
274/274 [==============================] - 0s 1ms/step - loss: 310.8293 - mean_squared_error: 310.8293
Epoch 72/100
274/274 [==============================] - 0s 1ms/step - loss: 310.2863 - mean_squared_error: 310.2863
Epoch 73/100
274/274 [==============================] - 0s 1ms/step - loss: 309.2907 - mean_squared_error: 309.2907
Epoch 74/100
274/274 [==============================] - 0s 1ms/step - loss: 306.9155 - mean_squared_error: 306.9155
Epoch 75/100
274/274 [==============================] - 0s 1ms/step - loss: 304.8138 - mean_squared_error: 304.8138
Epoch 76/100
274/274 [==============================] - 0s 1ms/step - loss: 303.4693 - mean_squared_error: 303.4693
Epoch 77/100
274/274 [==============================] - 0s 1ms/step - loss: 302.1253 - mean_squared_error: 302.1253
Epoch 78/100
274/274 [==============================] - 0s 1ms/step - loss: 300.5882 - mean_squared_error: 300.5882
Epoch 79/100
274/274 [==============================] - 0s 1ms/step - loss: 300.8849 - mean_squared_error: 300.8849
Epoch 80/100
274/274 [==============================] - 0s 1ms/step - loss: 297.9424 - mean_squared_error: 297.9424
Epoch 81/100
274/274 [==============================] - 0s 1ms/step - loss: 296.6845 - mean_squared_error: 296.6845
Epoch 82/100
274/274 [==============================] - 0s 1ms/step - loss: 301.2429 - mean_squared_error: 301.2429
Epoch 83/100
274/274 [==============================] - 0s 1ms/step - loss: 294.7325 - mean_squared_error: 294.7325
Epoch 84/100
274/274 [==============================] - 0s 1ms/step - loss: 293.9087 - mean_squared_error: 293.9087
Epoch 85/100
274/274 [==============================] - 0s 1ms/step - loss: 294.8573 - mean_squared_error: 294.8573
Epoch 86/100
274/274 [==============================] - 0s 1ms/step - loss: 291.5350 - mean_squared_error: 291.5350
Epoch 87/100
274/274 [==============================] - 0s 1ms/step - loss: 288.5298 - mean_squared_error: 288.5298
Epoch 88/100
274/274 [==============================] - 0s 1ms/step - loss: 290.0951 - mean_squared_error: 290.0951
Epoch 89/100
274/274 [==============================] - 0s 1ms/step - loss: 286.3828 - mean_squared_error: 286.3828
Epoch 90/100
274/274 [==============================] - 0s 1ms/step - loss: 282.4638 - mean_squared_error: 282.4638
Epoch 91/100
274/274 [==============================] - 0s 1ms/step - loss: 290.5275 - mean_squared_error: 290.5275
Epoch 92/100
274/274 [==============================] - 0s 1ms/step - loss: 282.0305 - mean_squared_error: 282.0305
Epoch 93/100
274/274 [==============================] - 0s 1ms/step - loss: 281.5406 - mean_squared_error: 281.5406
Epoch 94/100
274/274 [==============================] - 0s 1ms/step - loss: 287.6223 - mean_squared_error: 287.6223
Epoch 95/100
274/274 [==============================] - 0s 1ms/step - loss: 277.7972 - mean_squared_error: 277.7972
Epoch 96/100
274/274 [==============================] - 0s 1ms/step - loss: 279.9403 - mean_squared_error: 279.9403
Epoch 97/100
274/274 [==============================] - 0s 1ms/step - loss: 275.0088 - mean_squared_error: 275.0088
Epoch 98/100
274/274 [==============================] - 0s 1ms/step - loss: 276.8479 - mean_squared_error: 276.8479
Epoch 99/100
274/274 [==============================] - 0s 1ms/step - loss: 275.8300 - mean_squared_error: 275.8300
Epoch 100/100
274/274 [==============================] - 0s 1ms/step - loss: 274.4589 - mean_squared_error: 274.4589
<keras.callbacks.History at 0x2ae269e2610>
import math
math.sqrt(298.7904)
17.28555466278129
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('dev-0/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
x_test = pd.concat([x_test,df_train])
len(x_test)
9480
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
9480
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x_test = x_test.iloc[:-8760]
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

720 rows × 73 columns

pred= model.predict(x_test)
23/23 [==============================] - 0s 909us/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
23/23 [==============================] - 0s 955us/step