precipitation-pl/solution.ipynb
2022-05-23 15:05:18 +02:00

76 KiB
Raw Blame History

# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
df2 = pd.read_csv('dev-0/in.tsv', names=in_columns, sep='\t')
df = pd.concat([df, df2])
len(df)
9360
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
10080
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

10080 rows × 73 columns

x = x.iloc[:-720]
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
595 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
596 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
597 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
598 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
599 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9360 rows × 73 columns

y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
y2 = pd.read_csv('dev-0/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y = pd.concat([y,y2])
# Define model
model = Sequential()
model.add(Dense(2048, input_dim=73, activation= "relu"))
model.add(Dense(1024, activation= "relu"))
model.add(Dense(512, activation= "relu"))
model.add(Dense(256, activation= "relu"))
model.add(Dense(128, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense_26 (None, 73) float32
dense_27 (None, 2048) float32
dense_28 (None, 1024) float32
dense_29 (None, 512) float32
dense_30 (None, 256) float32
dense_31 (None, 128) float32
dense_32 (None, 64) float32
dense_33 (None, 32) float32
dense_34 (None, 16) float32
[None, None, None, None, None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=80)
Epoch 1/80
293/293 [==============================] - 6s 17ms/step - loss: 1134.1598 - mean_squared_error: 1134.1598
Epoch 2/80
293/293 [==============================] - 5s 16ms/step - loss: 714.3663 - mean_squared_error: 714.3663
Epoch 3/80
293/293 [==============================] - 5s 17ms/step - loss: 530.2103 - mean_squared_error: 530.2103
Epoch 4/80
293/293 [==============================] - 5s 16ms/step - loss: 466.3124 - mean_squared_error: 466.3124
Epoch 5/80
293/293 [==============================] - 5s 16ms/step - loss: 408.9340 - mean_squared_error: 408.9340
Epoch 6/80
293/293 [==============================] - 5s 17ms/step - loss: 376.8569 - mean_squared_error: 376.8569
Epoch 7/80
293/293 [==============================] - 5s 17ms/step - loss: 306.2373 - mean_squared_error: 306.2373
Epoch 8/80
293/293 [==============================] - 5s 16ms/step - loss: 265.6877 - mean_squared_error: 265.6877
Epoch 9/80
293/293 [==============================] - 5s 17ms/step - loss: 232.4935 - mean_squared_error: 232.4935
Epoch 10/80
293/293 [==============================] - 5s 18ms/step - loss: 190.4526 - mean_squared_error: 190.4526
Epoch 11/80
293/293 [==============================] - 5s 18ms/step - loss: 145.0189 - mean_squared_error: 145.0189
Epoch 12/80
293/293 [==============================] - 5s 16ms/step - loss: 119.3220 - mean_squared_error: 119.3220
Epoch 13/80
293/293 [==============================] - 5s 16ms/step - loss: 91.1009 - mean_squared_error: 91.1009
Epoch 14/80
293/293 [==============================] - 5s 16ms/step - loss: 74.9345 - mean_squared_error: 74.9345
Epoch 15/80
293/293 [==============================] - 5s 17ms/step - loss: 60.5697 - mean_squared_error: 60.5697
Epoch 16/80
293/293 [==============================] - 5s 18ms/step - loss: 60.6215 - mean_squared_error: 60.6215
Epoch 17/80
293/293 [==============================] - 5s 18ms/step - loss: 53.4988 - mean_squared_error: 53.4988
Epoch 18/80
293/293 [==============================] - 5s 16ms/step - loss: 46.9713 - mean_squared_error: 46.9713
Epoch 19/80
293/293 [==============================] - 5s 16ms/step - loss: 43.6367 - mean_squared_error: 43.6367
Epoch 20/80
293/293 [==============================] - 5s 16ms/step - loss: 43.7172 - mean_squared_error: 43.7172
Epoch 21/80
293/293 [==============================] - 5s 17ms/step - loss: 38.5771 - mean_squared_error: 38.5771
Epoch 22/80
293/293 [==============================] - 5s 16ms/step - loss: 39.4714 - mean_squared_error: 39.4714
Epoch 23/80
293/293 [==============================] - 5s 16ms/step - loss: 40.1202 - mean_squared_error: 40.1202
Epoch 24/80
293/293 [==============================] - 5s 16ms/step - loss: 53.1920 - mean_squared_error: 53.1920
Epoch 25/80
293/293 [==============================] - 5s 16ms/step - loss: 45.4968 - mean_squared_error: 45.4968
Epoch 26/80
293/293 [==============================] - 5s 16ms/step - loss: 34.6478 - mean_squared_error: 34.6478
Epoch 27/80
293/293 [==============================] - 5s 16ms/step - loss: 29.7110 - mean_squared_error: 29.7110
Epoch 28/80
293/293 [==============================] - 5s 16ms/step - loss: 24.7331 - mean_squared_error: 24.7331
Epoch 29/80
293/293 [==============================] - 5s 16ms/step - loss: 31.2403 - mean_squared_error: 31.2403
Epoch 30/80
293/293 [==============================] - 5s 16ms/step - loss: 28.0005 - mean_squared_error: 28.0005
Epoch 31/80
293/293 [==============================] - 5s 16ms/step - loss: 29.0533 - mean_squared_error: 29.0533
Epoch 32/80
293/293 [==============================] - 5s 16ms/step - loss: 30.9709 - mean_squared_error: 30.9709
Epoch 33/80
293/293 [==============================] - 5s 16ms/step - loss: 27.8636 - mean_squared_error: 27.8636
Epoch 34/80
293/293 [==============================] - 5s 17ms/step - loss: 38.6768 - mean_squared_error: 38.6768
Epoch 35/80
293/293 [==============================] - 5s 16ms/step - loss: 36.2994 - mean_squared_error: 36.2994
Epoch 36/80
293/293 [==============================] - 5s 16ms/step - loss: 32.9632 - mean_squared_error: 32.9632
Epoch 37/80
293/293 [==============================] - 5s 16ms/step - loss: 34.0196 - mean_squared_error: 34.0196
Epoch 38/80
293/293 [==============================] - 5s 16ms/step - loss: 27.4301 - mean_squared_error: 27.4301
Epoch 39/80
293/293 [==============================] - 5s 16ms/step - loss: 21.3594 - mean_squared_error: 21.3594
Epoch 40/80
293/293 [==============================] - 5s 16ms/step - loss: 15.9413 - mean_squared_error: 15.9413
Epoch 41/80
293/293 [==============================] - 5s 17ms/step - loss: 21.4223 - mean_squared_error: 21.4223
Epoch 42/80
293/293 [==============================] - 5s 17ms/step - loss: 24.0689 - mean_squared_error: 24.0689
Epoch 43/80
293/293 [==============================] - 5s 16ms/step - loss: 21.8016 - mean_squared_error: 21.8016
Epoch 44/80
293/293 [==============================] - 5s 16ms/step - loss: 22.8678 - mean_squared_error: 22.8678
Epoch 45/80
293/293 [==============================] - 5s 16ms/step - loss: 19.4661 - mean_squared_error: 19.4661
Epoch 46/80
293/293 [==============================] - 5s 16ms/step - loss: 21.0602 - mean_squared_error: 21.0602
Epoch 47/80
293/293 [==============================] - 5s 16ms/step - loss: 21.4916 - mean_squared_error: 21.4916
Epoch 48/80
293/293 [==============================] - 5s 16ms/step - loss: 24.5567 - mean_squared_error: 24.5567
Epoch 49/80
293/293 [==============================] - 5s 16ms/step - loss: 23.9477 - mean_squared_error: 23.9477
Epoch 50/80
293/293 [==============================] - 5s 16ms/step - loss: 21.6010 - mean_squared_error: 21.6010
Epoch 51/80
293/293 [==============================] - 5s 18ms/step - loss: 19.9157 - mean_squared_error: 19.9157
Epoch 52/80
293/293 [==============================] - 6s 19ms/step - loss: 21.2413 - mean_squared_error: 21.2413
Epoch 53/80
293/293 [==============================] - 6s 19ms/step - loss: 23.5774 - mean_squared_error: 23.5774
Epoch 54/80
293/293 [==============================] - 5s 17ms/step - loss: 20.9708 - mean_squared_error: 20.9708
Epoch 55/80
293/293 [==============================] - 5s 16ms/step - loss: 16.7699 - mean_squared_error: 16.7699
Epoch 56/80
293/293 [==============================] - 5s 16ms/step - loss: 11.5884 - mean_squared_error: 11.5884
Epoch 57/80
293/293 [==============================] - 5s 16ms/step - loss: 11.2608 - mean_squared_error: 11.2608
Epoch 58/80
293/293 [==============================] - 5s 16ms/step - loss: 13.6555 - mean_squared_error: 13.6555
Epoch 59/80
293/293 [==============================] - 5s 16ms/step - loss: 16.4050 - mean_squared_error: 16.4050
Epoch 60/80
293/293 [==============================] - 5s 16ms/step - loss: 23.0564 - mean_squared_error: 23.0564
Epoch 61/80
293/293 [==============================] - 5s 16ms/step - loss: 28.0808 - mean_squared_error: 28.0808
Epoch 62/80
293/293 [==============================] - 5s 16ms/step - loss: 19.2690 - mean_squared_error: 19.2690
Epoch 63/80
293/293 [==============================] - 5s 16ms/step - loss: 14.3920 - mean_squared_error: 14.3920
Epoch 64/80
293/293 [==============================] - 5s 16ms/step - loss: 12.5167 - mean_squared_error: 12.5167
Epoch 65/80
293/293 [==============================] - 5s 16ms/step - loss: 14.2031 - mean_squared_error: 14.2031
Epoch 66/80
293/293 [==============================] - 5s 16ms/step - loss: 13.2670 - mean_squared_error: 13.2670
Epoch 67/80
293/293 [==============================] - 5s 17ms/step - loss: 15.0297 - mean_squared_error: 15.0297
Epoch 68/80
293/293 [==============================] - 5s 16ms/step - loss: 16.3161 - mean_squared_error: 16.3161
Epoch 69/80
293/293 [==============================] - 5s 16ms/step - loss: 14.6367 - mean_squared_error: 14.6367
Epoch 70/80
293/293 [==============================] - 5s 16ms/step - loss: 12.4564 - mean_squared_error: 12.4564
Epoch 71/80
293/293 [==============================] - 5s 16ms/step - loss: 11.4511 - mean_squared_error: 11.4511
Epoch 72/80
293/293 [==============================] - 5s 17ms/step - loss: 14.1012 - mean_squared_error: 14.1012
Epoch 73/80
293/293 [==============================] - 5s 16ms/step - loss: 15.6135 - mean_squared_error: 15.6135
Epoch 74/80
293/293 [==============================] - 5s 16ms/step - loss: 16.4932 - mean_squared_error: 16.4932
Epoch 75/80
293/293 [==============================] - 5s 16ms/step - loss: 12.8654 - mean_squared_error: 12.8654
Epoch 76/80
293/293 [==============================] - 5s 16ms/step - loss: 10.6150 - mean_squared_error: 10.6150
Epoch 77/80
293/293 [==============================] - 5s 16ms/step - loss: 11.0828 - mean_squared_error: 11.0828
Epoch 78/80
293/293 [==============================] - 5s 16ms/step - loss: 12.4208 - mean_squared_error: 12.4208
Epoch 79/80
293/293 [==============================] - 5s 16ms/step - loss: 13.5073 - mean_squared_error: 13.5073
Epoch 80/80
293/293 [==============================] - 5s 16ms/step - loss: 13.8812 - mean_squared_error: 13.8812
<keras.callbacks.History at 0x1b6116d01c0>
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('test-A/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
x_test = pd.concat([x_test,df_train])
len(x_test)
9480
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
9480
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x_test = x_test.iloc[:-9360]
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
115 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
116 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
117 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
118 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
119 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

120 rows × 73 columns

pred= model.predict(x_test)
4/4 [==============================] - 0s 3ms/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
4/4 [==============================] - 0s 3ms/step