precipitation-pl/solution.ipynb
2022-05-22 20:32:15 +02:00

78 KiB
Raw Blame History

# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
len(df)
8760
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
9480
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x = x.iloc[:-720]
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

8760 rows × 73 columns

y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y
rainfall
0 19.4
1 43.2
2 72.2
3 25.3
4 89.3
... ...
8755 114.9
8756 101.2
8757 20.4
8758 93.2
8759 46.9

8760 rows × 1 columns

# Define model
model = Sequential()
model.add(Dense(1024, input_dim=73, activation= "relu"))
model.add(Dense(512, activation= "relu"))
model.add(Dense(256, activation= "relu"))
model.add(Dense(128, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense_103 (None, 73) float32
dense_104 (None, 1024) float32
dense_105 (None, 512) float32
dense_106 (None, 256) float32
dense_107 (None, 128) float32
dense_108 (None, 64) float32
dense_109 (None, 32) float32
dense_110 (None, 16) float32
[None, None, None, None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=80)
Epoch 1/80
274/274 [==============================] - 2s 6ms/step - loss: 1148.3020 - mean_squared_error: 1148.3020
Epoch 2/80
274/274 [==============================] - 2s 6ms/step - loss: 763.9616 - mean_squared_error: 763.9616
Epoch 3/80
274/274 [==============================] - 2s 6ms/step - loss: 545.3013 - mean_squared_error: 545.3013
Epoch 4/80
274/274 [==============================] - 2s 6ms/step - loss: 461.5141 - mean_squared_error: 461.5141
Epoch 5/80
274/274 [==============================] - 2s 6ms/step - loss: 422.0508 - mean_squared_error: 422.0508
Epoch 6/80
274/274 [==============================] - 2s 6ms/step - loss: 391.7643 - mean_squared_error: 391.7643
Epoch 7/80
274/274 [==============================] - 2s 6ms/step - loss: 331.7844 - mean_squared_error: 331.7844
Epoch 8/80
274/274 [==============================] - 2s 6ms/step - loss: 301.6153 - mean_squared_error: 301.6153
Epoch 9/80
274/274 [==============================] - 2s 6ms/step - loss: 260.0949 - mean_squared_error: 260.0949
Epoch 10/80
274/274 [==============================] - 2s 6ms/step - loss: 224.1433 - mean_squared_error: 224.1433
Epoch 11/80
274/274 [==============================] - 2s 6ms/step - loss: 201.2247 - mean_squared_error: 201.2247
Epoch 12/80
274/274 [==============================] - 1s 5ms/step - loss: 170.9166 - mean_squared_error: 170.9166
Epoch 13/80
274/274 [==============================] - 1s 5ms/step - loss: 139.1919 - mean_squared_error: 139.1919
Epoch 14/80
274/274 [==============================] - 1s 5ms/step - loss: 115.9581 - mean_squared_error: 115.9581
Epoch 15/80
274/274 [==============================] - 1s 5ms/step - loss: 103.9778 - mean_squared_error: 103.9778
Epoch 16/80
274/274 [==============================] - 1s 5ms/step - loss: 88.2708 - mean_squared_error: 88.2708
Epoch 17/80
274/274 [==============================] - 2s 6ms/step - loss: 72.0225 - mean_squared_error: 72.0225
Epoch 18/80
274/274 [==============================] - 2s 6ms/step - loss: 63.5714 - mean_squared_error: 63.5714
Epoch 19/80
274/274 [==============================] - 2s 6ms/step - loss: 56.0757 - mean_squared_error: 56.0757
Epoch 20/80
274/274 [==============================] - 2s 6ms/step - loss: 52.9535 - mean_squared_error: 52.9535
Epoch 21/80
274/274 [==============================] - 2s 6ms/step - loss: 50.0143 - mean_squared_error: 50.0143
Epoch 22/80
274/274 [==============================] - 2s 7ms/step - loss: 41.2315 - mean_squared_error: 41.2315
Epoch 23/80
274/274 [==============================] - 2s 6ms/step - loss: 39.8365 - mean_squared_error: 39.8365
Epoch 24/80
274/274 [==============================] - 2s 6ms/step - loss: 41.5614 - mean_squared_error: 41.5614
Epoch 25/80
274/274 [==============================] - 2s 6ms/step - loss: 42.3862 - mean_squared_error: 42.3862
Epoch 26/80
274/274 [==============================] - 2s 6ms/step - loss: 38.0177 - mean_squared_error: 38.0177
Epoch 27/80
274/274 [==============================] - 2s 6ms/step - loss: 36.0990 - mean_squared_error: 36.0990
Epoch 28/80
274/274 [==============================] - 2s 6ms/step - loss: 41.5000 - mean_squared_error: 41.5000
Epoch 29/80
274/274 [==============================] - 2s 6ms/step - loss: 37.8813 - mean_squared_error: 37.8813
Epoch 30/80
274/274 [==============================] - 2s 6ms/step - loss: 37.9894 - mean_squared_error: 37.9894
Epoch 31/80
274/274 [==============================] - 2s 6ms/step - loss: 31.0013 - mean_squared_error: 31.0013
Epoch 32/80
274/274 [==============================] - 2s 6ms/step - loss: 24.9764 - mean_squared_error: 24.9764
Epoch 33/80
274/274 [==============================] - 2s 6ms/step - loss: 31.9433 - mean_squared_error: 31.9433
Epoch 34/80
274/274 [==============================] - 2s 6ms/step - loss: 31.7013 - mean_squared_error: 31.7013
Epoch 35/80
274/274 [==============================] - 2s 6ms/step - loss: 29.5324 - mean_squared_error: 29.5324
Epoch 36/80
274/274 [==============================] - 2s 5ms/step - loss: 32.4733 - mean_squared_error: 32.4733
Epoch 37/80
274/274 [==============================] - 2s 6ms/step - loss: 23.7742 - mean_squared_error: 23.7742
Epoch 38/80
274/274 [==============================] - 2s 6ms/step - loss: 27.0307 - mean_squared_error: 27.0307
Epoch 39/80
274/274 [==============================] - 2s 6ms/step - loss: 28.7847 - mean_squared_error: 28.7847
Epoch 40/80
274/274 [==============================] - 2s 6ms/step - loss: 31.0826 - mean_squared_error: 31.0826
Epoch 41/80
274/274 [==============================] - 2s 6ms/step - loss: 26.5976 - mean_squared_error: 26.5976
Epoch 42/80
274/274 [==============================] - 2s 6ms/step - loss: 24.3899 - mean_squared_error: 24.3899
Epoch 43/80
274/274 [==============================] - 2s 7ms/step - loss: 20.7662 - mean_squared_error: 20.7662
Epoch 44/80
274/274 [==============================] - 2s 6ms/step - loss: 19.0226 - mean_squared_error: 19.0226
Epoch 45/80
274/274 [==============================] - 2s 6ms/step - loss: 19.3724 - mean_squared_error: 19.3724
Epoch 46/80
274/274 [==============================] - 2s 6ms/step - loss: 24.7011 - mean_squared_error: 24.7011
Epoch 47/80
274/274 [==============================] - 2s 6ms/step - loss: 25.1954 - mean_squared_error: 25.1954
Epoch 48/80
274/274 [==============================] - 2s 6ms/step - loss: 29.5989 - mean_squared_error: 29.5989
Epoch 49/80
274/274 [==============================] - 2s 6ms/step - loss: 22.7573 - mean_squared_error: 22.7573
Epoch 50/80
274/274 [==============================] - 2s 6ms/step - loss: 23.1566 - mean_squared_error: 23.1566
Epoch 51/80
274/274 [==============================] - 2s 6ms/step - loss: 18.3705 - mean_squared_error: 18.3705
Epoch 52/80
274/274 [==============================] - 2s 6ms/step - loss: 16.7029 - mean_squared_error: 16.7029
Epoch 53/80
274/274 [==============================] - 2s 6ms/step - loss: 16.9602 - mean_squared_error: 16.9602
Epoch 54/80
274/274 [==============================] - 2s 6ms/step - loss: 21.2996 - mean_squared_error: 21.2996
Epoch 55/80
274/274 [==============================] - 2s 6ms/step - loss: 19.7800 - mean_squared_error: 19.7800
Epoch 56/80
274/274 [==============================] - 2s 6ms/step - loss: 19.7060 - mean_squared_error: 19.7060
Epoch 57/80
274/274 [==============================] - 2s 6ms/step - loss: 20.6657 - mean_squared_error: 20.6657
Epoch 58/80
274/274 [==============================] - 2s 6ms/step - loss: 19.9114 - mean_squared_error: 19.9114
Epoch 59/80
274/274 [==============================] - 2s 6ms/step - loss: 15.5104 - mean_squared_error: 15.5104
Epoch 60/80
274/274 [==============================] - 2s 6ms/step - loss: 14.6696 - mean_squared_error: 14.6696
Epoch 61/80
274/274 [==============================] - 2s 6ms/step - loss: 15.2659 - mean_squared_error: 15.2659
Epoch 62/80
274/274 [==============================] - 2s 6ms/step - loss: 18.6857 - mean_squared_error: 18.6857
Epoch 63/80
274/274 [==============================] - 2s 7ms/step - loss: 19.9120 - mean_squared_error: 19.9120
Epoch 64/80
274/274 [==============================] - 2s 6ms/step - loss: 22.7588 - mean_squared_error: 22.7588
Epoch 65/80
274/274 [==============================] - 2s 6ms/step - loss: 18.3624 - mean_squared_error: 18.3624
Epoch 66/80
274/274 [==============================] - 2s 6ms/step - loss: 19.8439 - mean_squared_error: 19.8439
Epoch 67/80
274/274 [==============================] - 2s 6ms/step - loss: 15.1439 - mean_squared_error: 15.1439
Epoch 68/80
274/274 [==============================] - 2s 6ms/step - loss: 14.6623 - mean_squared_error: 14.6623
Epoch 69/80
274/274 [==============================] - 2s 6ms/step - loss: 14.0223 - mean_squared_error: 14.0223
Epoch 70/80
274/274 [==============================] - 2s 6ms/step - loss: 20.7763 - mean_squared_error: 20.7763
Epoch 71/80
274/274 [==============================] - 2s 6ms/step - loss: 19.1875 - mean_squared_error: 19.1875
Epoch 72/80
274/274 [==============================] - 2s 6ms/step - loss: 14.1436 - mean_squared_error: 14.1436
Epoch 73/80
274/274 [==============================] - 2s 7ms/step - loss: 14.2062 - mean_squared_error: 14.2062
Epoch 74/80
274/274 [==============================] - 2s 7ms/step - loss: 10.8038 - mean_squared_error: 10.8038
Epoch 75/80
274/274 [==============================] - 2s 6ms/step - loss: 10.9576 - mean_squared_error: 10.9576
Epoch 76/80
274/274 [==============================] - 2s 6ms/step - loss: 12.1856 - mean_squared_error: 12.1856
Epoch 77/80
274/274 [==============================] - 2s 7ms/step - loss: 12.5274 - mean_squared_error: 12.5274
Epoch 78/80
274/274 [==============================] - 2s 6ms/step - loss: 14.9551 - mean_squared_error: 14.9551
Epoch 79/80
274/274 [==============================] - 2s 6ms/step - loss: 15.9294 - mean_squared_error: 15.9294
Epoch 80/80
274/274 [==============================] - 2s 6ms/step - loss: 16.6070 - mean_squared_error: 16.6070
<keras.callbacks.History at 0x20e7057ed30>
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('test-A/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
x_test = pd.concat([x_test,df_train])
len(x_test)
9480
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
9480
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x_test = x_test.iloc[:-8760]
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

720 rows × 73 columns

pred= model.predict(x_test)
23/23 [==============================] - 0s 2ms/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
23/23 [==============================] - 0s 2ms/step