precipitation-pl/solution.ipynb
2022-05-22 20:26:09 +02:00

81 KiB
Raw Blame History

# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
len(df)
8760
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
9480
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x = x.iloc[:-720]
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

8760 rows × 73 columns

y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y
rainfall
0 19.4
1 43.2
2 72.2
3 25.3
4 89.3
... ...
8755 114.9
8756 101.2
8757 20.4
8758 93.2
8759 46.9

8760 rows × 1 columns

# Define model
model = Sequential()
model.add(Dense(1024, input_dim=73, activation= "relu"))
model.add(Dense(512, activation= "relu"))
model.add(Dense(256, activation= "relu"))
model.add(Dense(128, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense_95 (None, 73) float32
dense_96 (None, 1024) float32
dense_97 (None, 512) float32
dense_98 (None, 256) float32
dense_99 (None, 128) float32
dense_100 (None, 64) float32
dense_101 (None, 32) float32
dense_102 (None, 16) float32
[None, None, None, None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=100)
Epoch 1/100
274/274 [==============================] - 2s 6ms/step - loss: 1216.5399 - mean_squared_error: 1216.5399
Epoch 2/100
274/274 [==============================] - 2s 6ms/step - loss: 794.1711 - mean_squared_error: 794.1711
Epoch 3/100
274/274 [==============================] - 2s 6ms/step - loss: 580.7461 - mean_squared_error: 580.7461
Epoch 4/100
274/274 [==============================] - 2s 6ms/step - loss: 484.1317 - mean_squared_error: 484.1317
Epoch 5/100
274/274 [==============================] - 2s 6ms/step - loss: 441.7448 - mean_squared_error: 441.7448
Epoch 6/100
274/274 [==============================] - 2s 6ms/step - loss: 392.2047 - mean_squared_error: 392.2047
Epoch 7/100
274/274 [==============================] - 2s 6ms/step - loss: 361.4105 - mean_squared_error: 361.4105
Epoch 8/100
274/274 [==============================] - 2s 6ms/step - loss: 312.9633 - mean_squared_error: 312.9633
Epoch 9/100
274/274 [==============================] - 2s 7ms/step - loss: 275.2529 - mean_squared_error: 275.2529
Epoch 10/100
274/274 [==============================] - 2s 6ms/step - loss: 246.7625 - mean_squared_error: 246.7625
Epoch 11/100
274/274 [==============================] - 2s 6ms/step - loss: 195.6685 - mean_squared_error: 195.6685
Epoch 12/100
274/274 [==============================] - 2s 6ms/step - loss: 168.8491 - mean_squared_error: 168.8491
Epoch 13/100
274/274 [==============================] - 2s 7ms/step - loss: 150.1201 - mean_squared_error: 150.1201
Epoch 14/100
274/274 [==============================] - 2s 7ms/step - loss: 122.6171 - mean_squared_error: 122.6171
Epoch 15/100
274/274 [==============================] - 2s 6ms/step - loss: 100.8923 - mean_squared_error: 100.8923
Epoch 16/100
274/274 [==============================] - 2s 6ms/step - loss: 87.8484 - mean_squared_error: 87.8484
Epoch 17/100
274/274 [==============================] - 2s 6ms/step - loss: 77.6876 - mean_squared_error: 77.6876
Epoch 18/100
274/274 [==============================] - 2s 6ms/step - loss: 63.2032 - mean_squared_error: 63.2032
Epoch 19/100
274/274 [==============================] - 2s 6ms/step - loss: 57.2543 - mean_squared_error: 57.2543
Epoch 20/100
274/274 [==============================] - 2s 6ms/step - loss: 45.0924 - mean_squared_error: 45.0924
Epoch 21/100
274/274 [==============================] - 2s 6ms/step - loss: 49.1593 - mean_squared_error: 49.1593
Epoch 22/100
274/274 [==============================] - 2s 7ms/step - loss: 58.2306 - mean_squared_error: 58.2306
Epoch 23/100
274/274 [==============================] - 2s 6ms/step - loss: 48.0242 - mean_squared_error: 48.0242
Epoch 24/100
274/274 [==============================] - 2s 6ms/step - loss: 38.6356 - mean_squared_error: 38.6356
Epoch 25/100
274/274 [==============================] - 2s 6ms/step - loss: 30.9926 - mean_squared_error: 30.9926
Epoch 26/100
274/274 [==============================] - 2s 6ms/step - loss: 29.7819 - mean_squared_error: 29.7819
Epoch 27/100
274/274 [==============================] - 2s 6ms/step - loss: 32.5139 - mean_squared_error: 32.5139
Epoch 28/100
274/274 [==============================] - 2s 6ms/step - loss: 40.1129 - mean_squared_error: 40.1129
Epoch 29/100
274/274 [==============================] - 2s 6ms/step - loss: 51.6793 - mean_squared_error: 51.6793
Epoch 30/100
274/274 [==============================] - 2s 6ms/step - loss: 37.1284 - mean_squared_error: 37.1284
Epoch 31/100
274/274 [==============================] - 2s 5ms/step - loss: 30.2074 - mean_squared_error: 30.2074
Epoch 32/100
274/274 [==============================] - 2s 6ms/step - loss: 27.1982 - mean_squared_error: 27.1982
Epoch 33/100
274/274 [==============================] - 2s 7ms/step - loss: 26.5477 - mean_squared_error: 26.5477
Epoch 34/100
274/274 [==============================] - 2s 6ms/step - loss: 25.7544 - mean_squared_error: 25.7544
Epoch 35/100
274/274 [==============================] - 2s 6ms/step - loss: 24.1754 - mean_squared_error: 24.1754
Epoch 36/100
274/274 [==============================] - 2s 5ms/step - loss: 27.5213 - mean_squared_error: 27.5213
Epoch 37/100
274/274 [==============================] - 2s 5ms/step - loss: 30.3435 - mean_squared_error: 30.3435
Epoch 38/100
274/274 [==============================] - 2s 5ms/step - loss: 32.7374 - mean_squared_error: 32.7374
Epoch 39/100
274/274 [==============================] - 2s 6ms/step - loss: 29.2545 - mean_squared_error: 29.2545
Epoch 40/100
274/274 [==============================] - 2s 6ms/step - loss: 28.4834 - mean_squared_error: 28.4834
Epoch 41/100
274/274 [==============================] - 2s 6ms/step - loss: 22.9177 - mean_squared_error: 22.9177
Epoch 42/100
274/274 [==============================] - 2s 6ms/step - loss: 21.6796 - mean_squared_error: 21.6796
Epoch 43/100
274/274 [==============================] - 2s 6ms/step - loss: 20.2429 - mean_squared_error: 20.2429
Epoch 44/100
274/274 [==============================] - 2s 6ms/step - loss: 21.2112 - mean_squared_error: 21.2112
Epoch 45/100
274/274 [==============================] - 2s 5ms/step - loss: 25.0341 - mean_squared_error: 25.0341
Epoch 46/100
274/274 [==============================] - 2s 6ms/step - loss: 22.3963 - mean_squared_error: 22.3963
Epoch 47/100
274/274 [==============================] - 2s 6ms/step - loss: 23.1122 - mean_squared_error: 23.1122
Epoch 48/100
274/274 [==============================] - 2s 6ms/step - loss: 28.0343 - mean_squared_error: 28.0343
Epoch 49/100
274/274 [==============================] - 2s 6ms/step - loss: 22.2908 - mean_squared_error: 22.2908
Epoch 50/100
274/274 [==============================] - 2s 6ms/step - loss: 21.7871 - mean_squared_error: 21.7871
Epoch 51/100
274/274 [==============================] - 2s 6ms/step - loss: 19.8841 - mean_squared_error: 19.8841
Epoch 52/100
274/274 [==============================] - 2s 6ms/step - loss: 20.5390 - mean_squared_error: 20.5390
Epoch 53/100
274/274 [==============================] - 2s 5ms/step - loss: 22.3869 - mean_squared_error: 22.3869
Epoch 54/100
274/274 [==============================] - 2s 6ms/step - loss: 20.6540 - mean_squared_error: 20.6540
Epoch 55/100
274/274 [==============================] - 2s 6ms/step - loss: 18.3056 - mean_squared_error: 18.3056
Epoch 56/100
274/274 [==============================] - 2s 6ms/step - loss: 22.7574 - mean_squared_error: 22.7574
Epoch 57/100
274/274 [==============================] - 2s 6ms/step - loss: 20.1425 - mean_squared_error: 20.1425
Epoch 58/100
274/274 [==============================] - 2s 6ms/step - loss: 17.5521 - mean_squared_error: 17.5521
Epoch 59/100
274/274 [==============================] - 2s 6ms/step - loss: 18.2735 - mean_squared_error: 18.2735
Epoch 60/100
274/274 [==============================] - 2s 6ms/step - loss: 17.6372 - mean_squared_error: 17.6372
Epoch 61/100
274/274 [==============================] - 2s 6ms/step - loss: 15.2790 - mean_squared_error: 15.2790
Epoch 62/100
274/274 [==============================] - 2s 6ms/step - loss: 12.9527 - mean_squared_error: 12.9527
Epoch 63/100
274/274 [==============================] - 2s 6ms/step - loss: 13.2732 - mean_squared_error: 13.2732
Epoch 64/100
274/274 [==============================] - 2s 7ms/step - loss: 18.0740 - mean_squared_error: 18.0740
Epoch 65/100
274/274 [==============================] - 2s 6ms/step - loss: 23.5823 - mean_squared_error: 23.5823
Epoch 66/100
274/274 [==============================] - 2s 6ms/step - loss: 22.4731 - mean_squared_error: 22.4731
Epoch 67/100
274/274 [==============================] - 2s 6ms/step - loss: 17.0889 - mean_squared_error: 17.0889
Epoch 68/100
274/274 [==============================] - 2s 6ms/step - loss: 13.5507 - mean_squared_error: 13.5507
Epoch 69/100
274/274 [==============================] - 2s 6ms/step - loss: 14.6270 - mean_squared_error: 14.6270
Epoch 70/100
274/274 [==============================] - 2s 6ms/step - loss: 15.7420 - mean_squared_error: 15.7420
Epoch 71/100
274/274 [==============================] - 2s 6ms/step - loss: 15.6920 - mean_squared_error: 15.6920
Epoch 72/100
274/274 [==============================] - 2s 6ms/step - loss: 17.8469 - mean_squared_error: 17.8469
Epoch 73/100
274/274 [==============================] - 2s 6ms/step - loss: 20.0690 - mean_squared_error: 20.0690
Epoch 74/100
274/274 [==============================] - 2s 6ms/step - loss: 16.4538 - mean_squared_error: 16.4538
Epoch 75/100
274/274 [==============================] - 2s 6ms/step - loss: 13.7226 - mean_squared_error: 13.7226
Epoch 76/100
274/274 [==============================] - 2s 6ms/step - loss: 11.6082 - mean_squared_error: 11.6082
Epoch 77/100
274/274 [==============================] - 2s 6ms/step - loss: 11.4206 - mean_squared_error: 11.4206
Epoch 78/100
274/274 [==============================] - 2s 6ms/step - loss: 12.9487 - mean_squared_error: 12.9487
Epoch 79/100
274/274 [==============================] - 2s 7ms/step - loss: 14.9138 - mean_squared_error: 14.9138
Epoch 80/100
274/274 [==============================] - 2s 6ms/step - loss: 16.7601 - mean_squared_error: 16.7601
Epoch 81/100
274/274 [==============================] - 2s 7ms/step - loss: 16.3490 - mean_squared_error: 16.3490
Epoch 82/100
274/274 [==============================] - 2s 6ms/step - loss: 12.4280 - mean_squared_error: 12.4280
Epoch 83/100
274/274 [==============================] - 2s 6ms/step - loss: 9.2046 - mean_squared_error: 9.2046
Epoch 84/100
274/274 [==============================] - 2s 6ms/step - loss: 8.5721 - mean_squared_error: 8.5721
Epoch 85/100
274/274 [==============================] - 2s 7ms/step - loss: 9.8912 - mean_squared_error: 9.8912
Epoch 86/100
274/274 [==============================] - 2s 6ms/step - loss: 10.4523 - mean_squared_error: 10.4523
Epoch 87/100
274/274 [==============================] - 2s 6ms/step - loss: 19.6175 - mean_squared_error: 19.6175
Epoch 88/100
274/274 [==============================] - 2s 6ms/step - loss: 16.5808 - mean_squared_error: 16.5808
Epoch 89/100
274/274 [==============================] - 2s 5ms/step - loss: 15.8564 - mean_squared_error: 15.8564
Epoch 90/100
274/274 [==============================] - 2s 6ms/step - loss: 12.2800 - mean_squared_error: 12.2800
Epoch 91/100
274/274 [==============================] - 2s 6ms/step - loss: 10.0090 - mean_squared_error: 10.0090
Epoch 92/100
274/274 [==============================] - 2s 6ms/step - loss: 9.4647 - mean_squared_error: 9.4647
Epoch 93/100
274/274 [==============================] - 2s 6ms/step - loss: 10.7999 - mean_squared_error: 10.7999
Epoch 94/100
274/274 [==============================] - 2s 6ms/step - loss: 10.2449 - mean_squared_error: 10.2449
Epoch 95/100
274/274 [==============================] - 2s 6ms/step - loss: 10.0525 - mean_squared_error: 10.0525
Epoch 96/100
274/274 [==============================] - 2s 5ms/step - loss: 11.3375 - mean_squared_error: 11.3375
Epoch 97/100
274/274 [==============================] - 2s 6ms/step - loss: 11.6955 - mean_squared_error: 11.6955
Epoch 98/100
274/274 [==============================] - 2s 6ms/step - loss: 11.2546 - mean_squared_error: 11.2546
Epoch 99/100
274/274 [==============================] - 2s 6ms/step - loss: 10.2126 - mean_squared_error: 10.2126
Epoch 100/100
274/274 [==============================] - 2s 6ms/step - loss: 8.5690 - mean_squared_error: 8.5690
<keras.callbacks.History at 0x20e6929d2e0>
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('test-A/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
x_test = pd.concat([x_test,df_train])
len(x_test)
9480
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
9480
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x_test = x_test.iloc[:-8760]
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

720 rows × 73 columns

pred= model.predict(x_test)
23/23 [==============================] - 0s 2ms/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
23/23 [==============================] - 0s 2ms/step