76 KiB
76 KiB
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']
df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
len(df)
8760
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
9480
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 | id_stacji_249190560 | id_stacji_249200370 | id_stacji_249200490 | id_stacji_249220150 | id_stacji_249220180 | id_stacji_250190160 | id_stacji_250190390 | id_stacji_250210130 | id_stacji_251170090 | ... | miesiąc_3 | miesiąc_4 | miesiąc_5 | miesiąc_6 | miesiąc_7 | miesiąc_8 | miesiąc_9 | miesiąc_10 | miesiąc_11 | miesiąc_12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
715 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
716 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
717 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
718 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
719 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
9480 rows × 73 columns
x = x.iloc[:-720]
x
id_stacji_249180010 | id_stacji_249190560 | id_stacji_249200370 | id_stacji_249200490 | id_stacji_249220150 | id_stacji_249220180 | id_stacji_250190160 | id_stacji_250190390 | id_stacji_250210130 | id_stacji_251170090 | ... | miesiąc_3 | miesiąc_4 | miesiąc_5 | miesiąc_6 | miesiąc_7 | miesiąc_8 | miesiąc_9 | miesiąc_10 | miesiąc_11 | miesiąc_12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
8755 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
8756 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
8757 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
8758 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
8759 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
8760 rows × 73 columns
y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y
rainfall | |
---|---|
0 | 19.4 |
1 | 43.2 |
2 | 72.2 |
3 | 25.3 |
4 | 89.3 |
... | ... |
8755 | 114.9 |
8756 | 101.2 |
8757 | 20.4 |
8758 | 93.2 |
8759 | 46.9 |
8760 rows × 1 columns
# Define model
model = Sequential()
#model.add(Dense(16, input_dim=73, activation= "relu"))
#model.add(Dense(32, activation= "relu"))
#model.add(Dense(64, activation= "relu"))
model.add(Dense(128,input_dim=73, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'> (None, 1) <dtype: 'float32'> dense_13 (None, 73) float32 dense_14 (None, 128) float32 dense_15 (None, 64) float32 dense_16 (None, 32) float32 dense_17 (None, 16) float32
[None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=60)
Epoch 1/60 274/274 [==============================] - 1s 1ms/step - loss: 1591.3160 - mean_squared_error: 1591.3160 Epoch 2/60 274/274 [==============================] - 0s 1ms/step - loss: 937.6966 - mean_squared_error: 937.6966 Epoch 3/60 274/274 [==============================] - 0s 1ms/step - loss: 885.1689 - mean_squared_error: 885.1689 Epoch 4/60 274/274 [==============================] - 0s 1ms/step - loss: 803.5897 - mean_squared_error: 803.5897 Epoch 5/60 274/274 [==============================] - 0s 1ms/step - loss: 686.9089 - mean_squared_error: 686.9089 Epoch 6/60 274/274 [==============================] - 0s 1ms/step - loss: 583.9525 - mean_squared_error: 583.9525 Epoch 7/60 274/274 [==============================] - 0s 1ms/step - loss: 518.3331 - mean_squared_error: 518.3331 Epoch 8/60 274/274 [==============================] - 0s 1ms/step - loss: 477.7906 - mean_squared_error: 477.7906 Epoch 9/60 274/274 [==============================] - 0s 1ms/step - loss: 446.8395 - mean_squared_error: 446.8395 Epoch 10/60 274/274 [==============================] - 0s 1ms/step - loss: 420.2437 - mean_squared_error: 420.2437 Epoch 11/60 274/274 [==============================] - 0s 1ms/step - loss: 401.8218 - mean_squared_error: 401.8218 Epoch 12/60 274/274 [==============================] - 0s 1ms/step - loss: 384.3383 - mean_squared_error: 384.3383 Epoch 13/60 274/274 [==============================] - 0s 1ms/step - loss: 363.4060 - mean_squared_error: 363.4060 Epoch 14/60 274/274 [==============================] - 0s 1ms/step - loss: 345.5817 - mean_squared_error: 345.5817 Epoch 15/60 274/274 [==============================] - 0s 1ms/step - loss: 332.1534 - mean_squared_error: 332.1534 Epoch 16/60 274/274 [==============================] - 0s 1ms/step - loss: 313.5759 - mean_squared_error: 313.5759 Epoch 17/60 274/274 [==============================] - 0s 1ms/step - loss: 297.7942 - mean_squared_error: 297.7942 Epoch 18/60 274/274 [==============================] - 0s 1ms/step - loss: 281.8712 - mean_squared_error: 281.8712 Epoch 19/60 274/274 [==============================] - 0s 1ms/step - loss: 268.7486 - mean_squared_error: 268.7486 Epoch 20/60 274/274 [==============================] - 0s 1ms/step - loss: 251.2501 - mean_squared_error: 251.2501 Epoch 21/60 274/274 [==============================] - 0s 1ms/step - loss: 248.3035 - mean_squared_error: 248.3035 Epoch 22/60 274/274 [==============================] - 0s 1ms/step - loss: 227.5490 - mean_squared_error: 227.5490 Epoch 23/60 274/274 [==============================] - 0s 1ms/step - loss: 212.7673 - mean_squared_error: 212.7673 Epoch 24/60 274/274 [==============================] - 0s 1ms/step - loss: 203.8420 - mean_squared_error: 203.8420 Epoch 25/60 274/274 [==============================] - 0s 1ms/step - loss: 196.8168 - mean_squared_error: 196.8168 Epoch 26/60 274/274 [==============================] - 0s 1ms/step - loss: 177.4812 - mean_squared_error: 177.4812 Epoch 27/60 274/274 [==============================] - 0s 1ms/step - loss: 168.7412 - mean_squared_error: 168.7412 Epoch 28/60 274/274 [==============================] - 0s 1ms/step - loss: 160.3402 - mean_squared_error: 160.3402 Epoch 29/60 274/274 [==============================] - 0s 1ms/step - loss: 149.0066 - mean_squared_error: 149.0066 Epoch 30/60 274/274 [==============================] - 0s 1ms/step - loss: 146.2084 - mean_squared_error: 146.2084 Epoch 31/60 274/274 [==============================] - 0s 1ms/step - loss: 139.4028 - mean_squared_error: 139.4028 Epoch 32/60 274/274 [==============================] - 0s 1ms/step - loss: 127.5936 - mean_squared_error: 127.5936 Epoch 33/60 274/274 [==============================] - 0s 1ms/step - loss: 126.8296 - mean_squared_error: 126.8296 Epoch 34/60 274/274 [==============================] - 0s 1ms/step - loss: 120.6797 - mean_squared_error: 120.6797 Epoch 35/60 274/274 [==============================] - 0s 1ms/step - loss: 109.6528 - mean_squared_error: 109.6528 Epoch 36/60 274/274 [==============================] - 0s 1ms/step - loss: 108.0870 - mean_squared_error: 108.0870 Epoch 37/60 274/274 [==============================] - 0s 1ms/step - loss: 103.3821 - mean_squared_error: 103.3821 Epoch 38/60 274/274 [==============================] - 0s 1ms/step - loss: 101.3340 - mean_squared_error: 101.3340 Epoch 39/60 274/274 [==============================] - 0s 1ms/step - loss: 97.4961 - mean_squared_error: 97.4961 Epoch 40/60 274/274 [==============================] - 0s 1ms/step - loss: 101.1986 - mean_squared_error: 101.1986 Epoch 41/60 274/274 [==============================] - 0s 1ms/step - loss: 85.8747 - mean_squared_error: 85.8747 Epoch 42/60 274/274 [==============================] - 0s 1ms/step - loss: 83.6872 - mean_squared_error: 83.6872 Epoch 43/60 274/274 [==============================] - 0s 1ms/step - loss: 83.8899 - mean_squared_error: 83.8899 Epoch 44/60 274/274 [==============================] - 0s 1ms/step - loss: 78.8383 - mean_squared_error: 78.8383 Epoch 45/60 274/274 [==============================] - 0s 1ms/step - loss: 77.0742 - mean_squared_error: 77.0742 Epoch 46/60 274/274 [==============================] - 0s 1ms/step - loss: 77.3783 - mean_squared_error: 77.3783 Epoch 47/60 274/274 [==============================] - 0s 1ms/step - loss: 74.7967 - mean_squared_error: 74.7967 Epoch 48/60 274/274 [==============================] - 0s 1ms/step - loss: 74.1861 - mean_squared_error: 74.1861 Epoch 49/60 274/274 [==============================] - 0s 1ms/step - loss: 71.5402 - mean_squared_error: 71.5402 Epoch 50/60 274/274 [==============================] - 0s 1ms/step - loss: 65.2219 - mean_squared_error: 65.2219 Epoch 51/60 274/274 [==============================] - 0s 1ms/step - loss: 68.7477 - mean_squared_error: 68.7477 Epoch 52/60 274/274 [==============================] - 0s 2ms/step - loss: 64.0499 - mean_squared_error: 64.0499 Epoch 53/60 274/274 [==============================] - 0s 1ms/step - loss: 66.3469 - mean_squared_error: 66.3469 Epoch 54/60 274/274 [==============================] - 0s 1ms/step - loss: 66.4551 - mean_squared_error: 66.4551 Epoch 55/60 274/274 [==============================] - 0s 1ms/step - loss: 62.5891 - mean_squared_error: 62.5891 Epoch 56/60 274/274 [==============================] - 0s 1ms/step - loss: 54.9064 - mean_squared_error: 54.9064 Epoch 57/60 274/274 [==============================] - 0s 1ms/step - loss: 54.1649 - mean_squared_error: 54.1649 Epoch 58/60 274/274 [==============================] - 0s 1ms/step - loss: 59.5273 - mean_squared_error: 59.5273 Epoch 59/60 274/274 [==============================] - 0s 1ms/step - loss: 50.9209 - mean_squared_error: 50.9209 Epoch 60/60 274/274 [==============================] - 0s 1ms/step - loss: 50.5574 - mean_squared_error: 50.5574
<keras.callbacks.History at 0x20e609f0cd0>
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('dev-0/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
x_test = pd.concat([x_test,df_train])
len(x_test)
9480
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
9480
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 | id_stacji_249190560 | id_stacji_249200370 | id_stacji_249200490 | id_stacji_249220150 | id_stacji_249220180 | id_stacji_250190160 | id_stacji_250190390 | id_stacji_250210130 | id_stacji_251170090 | ... | miesiąc_3 | miesiąc_4 | miesiąc_5 | miesiąc_6 | miesiąc_7 | miesiąc_8 | miesiąc_9 | miesiąc_10 | miesiąc_11 | miesiąc_12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
8755 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
8756 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
8757 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
8758 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
8759 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
9480 rows × 73 columns
x_test = x_test.iloc[:-8760]
x_test
id_stacji_249180010 | id_stacji_249190560 | id_stacji_249200370 | id_stacji_249200490 | id_stacji_249220150 | id_stacji_249220180 | id_stacji_250190160 | id_stacji_250190390 | id_stacji_250210130 | id_stacji_251170090 | ... | miesiąc_3 | miesiąc_4 | miesiąc_5 | miesiąc_6 | miesiąc_7 | miesiąc_8 | miesiąc_9 | miesiąc_10 | miesiąc_11 | miesiąc_12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
715 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
716 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
717 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
718 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
719 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
720 rows × 73 columns
pred= model.predict(x_test)
23/23 [==============================] - 0s 955us/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
23/23 [==============================] - 0s 1ms/step