precipitation-pl/solution.ipynb
2022-05-22 20:02:12 +02:00

76 KiB
Raw Blame History

# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
len(df)
8760
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
9480
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x = x.iloc[:-720]
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

8760 rows × 73 columns

y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y
rainfall
0 19.4
1 43.2
2 72.2
3 25.3
4 89.3
... ...
8755 114.9
8756 101.2
8757 20.4
8758 93.2
8759 46.9

8760 rows × 1 columns

# Define model
model = Sequential()
#model.add(Dense(16, input_dim=73, activation= "relu"))
#model.add(Dense(32, activation= "relu"))
#model.add(Dense(64, activation= "relu"))
model.add(Dense(128,input_dim=73, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense_13 (None, 73) float32
dense_14 (None, 128) float32
dense_15 (None, 64) float32
dense_16 (None, 32) float32
dense_17 (None, 16) float32
[None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=60)
Epoch 1/60
274/274 [==============================] - 1s 1ms/step - loss: 1591.3160 - mean_squared_error: 1591.3160
Epoch 2/60
274/274 [==============================] - 0s 1ms/step - loss: 937.6966 - mean_squared_error: 937.6966
Epoch 3/60
274/274 [==============================] - 0s 1ms/step - loss: 885.1689 - mean_squared_error: 885.1689
Epoch 4/60
274/274 [==============================] - 0s 1ms/step - loss: 803.5897 - mean_squared_error: 803.5897
Epoch 5/60
274/274 [==============================] - 0s 1ms/step - loss: 686.9089 - mean_squared_error: 686.9089
Epoch 6/60
274/274 [==============================] - 0s 1ms/step - loss: 583.9525 - mean_squared_error: 583.9525
Epoch 7/60
274/274 [==============================] - 0s 1ms/step - loss: 518.3331 - mean_squared_error: 518.3331
Epoch 8/60
274/274 [==============================] - 0s 1ms/step - loss: 477.7906 - mean_squared_error: 477.7906
Epoch 9/60
274/274 [==============================] - 0s 1ms/step - loss: 446.8395 - mean_squared_error: 446.8395
Epoch 10/60
274/274 [==============================] - 0s 1ms/step - loss: 420.2437 - mean_squared_error: 420.2437
Epoch 11/60
274/274 [==============================] - 0s 1ms/step - loss: 401.8218 - mean_squared_error: 401.8218
Epoch 12/60
274/274 [==============================] - 0s 1ms/step - loss: 384.3383 - mean_squared_error: 384.3383
Epoch 13/60
274/274 [==============================] - 0s 1ms/step - loss: 363.4060 - mean_squared_error: 363.4060
Epoch 14/60
274/274 [==============================] - 0s 1ms/step - loss: 345.5817 - mean_squared_error: 345.5817
Epoch 15/60
274/274 [==============================] - 0s 1ms/step - loss: 332.1534 - mean_squared_error: 332.1534
Epoch 16/60
274/274 [==============================] - 0s 1ms/step - loss: 313.5759 - mean_squared_error: 313.5759
Epoch 17/60
274/274 [==============================] - 0s 1ms/step - loss: 297.7942 - mean_squared_error: 297.7942
Epoch 18/60
274/274 [==============================] - 0s 1ms/step - loss: 281.8712 - mean_squared_error: 281.8712
Epoch 19/60
274/274 [==============================] - 0s 1ms/step - loss: 268.7486 - mean_squared_error: 268.7486
Epoch 20/60
274/274 [==============================] - 0s 1ms/step - loss: 251.2501 - mean_squared_error: 251.2501
Epoch 21/60
274/274 [==============================] - 0s 1ms/step - loss: 248.3035 - mean_squared_error: 248.3035
Epoch 22/60
274/274 [==============================] - 0s 1ms/step - loss: 227.5490 - mean_squared_error: 227.5490
Epoch 23/60
274/274 [==============================] - 0s 1ms/step - loss: 212.7673 - mean_squared_error: 212.7673
Epoch 24/60
274/274 [==============================] - 0s 1ms/step - loss: 203.8420 - mean_squared_error: 203.8420
Epoch 25/60
274/274 [==============================] - 0s 1ms/step - loss: 196.8168 - mean_squared_error: 196.8168
Epoch 26/60
274/274 [==============================] - 0s 1ms/step - loss: 177.4812 - mean_squared_error: 177.4812
Epoch 27/60
274/274 [==============================] - 0s 1ms/step - loss: 168.7412 - mean_squared_error: 168.7412
Epoch 28/60
274/274 [==============================] - 0s 1ms/step - loss: 160.3402 - mean_squared_error: 160.3402
Epoch 29/60
274/274 [==============================] - 0s 1ms/step - loss: 149.0066 - mean_squared_error: 149.0066
Epoch 30/60
274/274 [==============================] - 0s 1ms/step - loss: 146.2084 - mean_squared_error: 146.2084
Epoch 31/60
274/274 [==============================] - 0s 1ms/step - loss: 139.4028 - mean_squared_error: 139.4028
Epoch 32/60
274/274 [==============================] - 0s 1ms/step - loss: 127.5936 - mean_squared_error: 127.5936
Epoch 33/60
274/274 [==============================] - 0s 1ms/step - loss: 126.8296 - mean_squared_error: 126.8296
Epoch 34/60
274/274 [==============================] - 0s 1ms/step - loss: 120.6797 - mean_squared_error: 120.6797
Epoch 35/60
274/274 [==============================] - 0s 1ms/step - loss: 109.6528 - mean_squared_error: 109.6528
Epoch 36/60
274/274 [==============================] - 0s 1ms/step - loss: 108.0870 - mean_squared_error: 108.0870
Epoch 37/60
274/274 [==============================] - 0s 1ms/step - loss: 103.3821 - mean_squared_error: 103.3821
Epoch 38/60
274/274 [==============================] - 0s 1ms/step - loss: 101.3340 - mean_squared_error: 101.3340
Epoch 39/60
274/274 [==============================] - 0s 1ms/step - loss: 97.4961 - mean_squared_error: 97.4961
Epoch 40/60
274/274 [==============================] - 0s 1ms/step - loss: 101.1986 - mean_squared_error: 101.1986
Epoch 41/60
274/274 [==============================] - 0s 1ms/step - loss: 85.8747 - mean_squared_error: 85.8747
Epoch 42/60
274/274 [==============================] - 0s 1ms/step - loss: 83.6872 - mean_squared_error: 83.6872
Epoch 43/60
274/274 [==============================] - 0s 1ms/step - loss: 83.8899 - mean_squared_error: 83.8899
Epoch 44/60
274/274 [==============================] - 0s 1ms/step - loss: 78.8383 - mean_squared_error: 78.8383
Epoch 45/60
274/274 [==============================] - 0s 1ms/step - loss: 77.0742 - mean_squared_error: 77.0742
Epoch 46/60
274/274 [==============================] - 0s 1ms/step - loss: 77.3783 - mean_squared_error: 77.3783
Epoch 47/60
274/274 [==============================] - 0s 1ms/step - loss: 74.7967 - mean_squared_error: 74.7967
Epoch 48/60
274/274 [==============================] - 0s 1ms/step - loss: 74.1861 - mean_squared_error: 74.1861
Epoch 49/60
274/274 [==============================] - 0s 1ms/step - loss: 71.5402 - mean_squared_error: 71.5402
Epoch 50/60
274/274 [==============================] - 0s 1ms/step - loss: 65.2219 - mean_squared_error: 65.2219
Epoch 51/60
274/274 [==============================] - 0s 1ms/step - loss: 68.7477 - mean_squared_error: 68.7477
Epoch 52/60
274/274 [==============================] - 0s 2ms/step - loss: 64.0499 - mean_squared_error: 64.0499
Epoch 53/60
274/274 [==============================] - 0s 1ms/step - loss: 66.3469 - mean_squared_error: 66.3469
Epoch 54/60
274/274 [==============================] - 0s 1ms/step - loss: 66.4551 - mean_squared_error: 66.4551
Epoch 55/60
274/274 [==============================] - 0s 1ms/step - loss: 62.5891 - mean_squared_error: 62.5891
Epoch 56/60
274/274 [==============================] - 0s 1ms/step - loss: 54.9064 - mean_squared_error: 54.9064
Epoch 57/60
274/274 [==============================] - 0s 1ms/step - loss: 54.1649 - mean_squared_error: 54.1649
Epoch 58/60
274/274 [==============================] - 0s 1ms/step - loss: 59.5273 - mean_squared_error: 59.5273
Epoch 59/60
274/274 [==============================] - 0s 1ms/step - loss: 50.9209 - mean_squared_error: 50.9209
Epoch 60/60
274/274 [==============================] - 0s 1ms/step - loss: 50.5574 - mean_squared_error: 50.5574
<keras.callbacks.History at 0x20e609f0cd0>
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('dev-0/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
x_test = pd.concat([x_test,df_train])
len(x_test)
9480
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
9480
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8755 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
8756 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
8757 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
8758 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
8759 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9480 rows × 73 columns

x_test = x_test.iloc[:-8760]
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

720 rows × 73 columns

pred= model.predict(x_test)
23/23 [==============================] - 0s 955us/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
23/23 [==============================] - 0s 1ms/step