precipitation-pl/solution.ipynb
2022-05-23 15:41:22 +02:00

76 KiB
Raw Blame History

# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
df2 = pd.read_csv('dev-0/in.tsv', names=in_columns, sep='\t')
df = pd.concat([df, df2])
len(df)
9360
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)
720
df = pd.concat([df,df_test])
len(df)
10080
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

10080 rows × 73 columns

x = x.iloc[:-720]
x
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
595 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
596 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
597 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
598 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
599 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

9360 rows × 73 columns

y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
y2 = pd.read_csv('dev-0/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y = pd.concat([y,y2])
# Define model
model = Sequential()
model.add(Dense(2048, input_dim=73, activation= "relu"))
model.add(Dense(1024, activation= "relu"))
model.add(Dense(512, activation= "relu"))
model.add(Dense(256, activation= "relu"))
model.add(Dense(128, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')
y = np.asarray(y).astype('float32')
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense_44 (None, 73) float32
dense_45 (None, 2048) float32
dense_46 (None, 1024) float32
dense_47 (None, 512) float32
dense_48 (None, 256) float32
dense_49 (None, 128) float32
dense_50 (None, 64) float32
dense_51 (None, 32) float32
dense_52 (None, 16) float32
[None, None, None, None, None, None, None, None, None]
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=80)
Epoch 1/80
293/293 [==============================] - 5s 17ms/step - loss: 1108.6758 - mean_squared_error: 1108.6758
Epoch 2/80
293/293 [==============================] - 5s 17ms/step - loss: 671.0632 - mean_squared_error: 671.0632
Epoch 3/80
293/293 [==============================] - 5s 17ms/step - loss: 536.2025 - mean_squared_error: 536.2025
Epoch 4/80
293/293 [==============================] - 5s 16ms/step - loss: 457.3617 - mean_squared_error: 457.3617
Epoch 5/80
293/293 [==============================] - 5s 16ms/step - loss: 406.1862 - mean_squared_error: 406.1862
Epoch 6/80
293/293 [==============================] - 5s 17ms/step - loss: 369.4316 - mean_squared_error: 369.4316
Epoch 7/80
293/293 [==============================] - 5s 17ms/step - loss: 312.5139 - mean_squared_error: 312.5139
Epoch 8/80
293/293 [==============================] - 5s 17ms/step - loss: 270.2833 - mean_squared_error: 270.2833
Epoch 9/80
293/293 [==============================] - 5s 17ms/step - loss: 223.4037 - mean_squared_error: 223.4037
Epoch 10/80
293/293 [==============================] - 5s 17ms/step - loss: 179.4202 - mean_squared_error: 179.4202
Epoch 11/80
293/293 [==============================] - 5s 17ms/step - loss: 143.8777 - mean_squared_error: 143.8777
Epoch 12/80
293/293 [==============================] - 5s 18ms/step - loss: 135.4522 - mean_squared_error: 135.4522
Epoch 13/80
293/293 [==============================] - 5s 18ms/step - loss: 109.2838 - mean_squared_error: 109.2838
Epoch 14/80
293/293 [==============================] - 5s 17ms/step - loss: 88.6090 - mean_squared_error: 88.6090
Epoch 15/80
293/293 [==============================] - 5s 17ms/step - loss: 69.3139 - mean_squared_error: 69.3139
Epoch 16/80
293/293 [==============================] - 5s 16ms/step - loss: 67.1195 - mean_squared_error: 67.1195
Epoch 17/80
293/293 [==============================] - 5s 16ms/step - loss: 59.6054 - mean_squared_error: 59.6054
Epoch 18/80
293/293 [==============================] - 5s 17ms/step - loss: 50.4958 - mean_squared_error: 50.4958
Epoch 19/80
293/293 [==============================] - 5s 17ms/step - loss: 41.2413 - mean_squared_error: 41.2413
Epoch 20/80
293/293 [==============================] - 5s 17ms/step - loss: 35.0757 - mean_squared_error: 35.0757
Epoch 21/80
293/293 [==============================] - 5s 17ms/step - loss: 43.3807 - mean_squared_error: 43.3807
Epoch 22/80
293/293 [==============================] - 5s 17ms/step - loss: 48.1348 - mean_squared_error: 48.1348
Epoch 23/80
293/293 [==============================] - 5s 18ms/step - loss: 52.9108 - mean_squared_error: 52.9108
Epoch 24/80
293/293 [==============================] - 5s 17ms/step - loss: 40.8023 - mean_squared_error: 40.8023
Epoch 25/80
293/293 [==============================] - 5s 16ms/step - loss: 35.4987 - mean_squared_error: 35.4987
Epoch 26/80
293/293 [==============================] - 5s 16ms/step - loss: 35.0609 - mean_squared_error: 35.0609
Epoch 27/80
293/293 [==============================] - 5s 16ms/step - loss: 39.9937 - mean_squared_error: 39.9937
Epoch 28/80
293/293 [==============================] - 5s 17ms/step - loss: 29.5927 - mean_squared_error: 29.5927
Epoch 29/80
293/293 [==============================] - 5s 17ms/step - loss: 33.4916 - mean_squared_error: 33.4916
Epoch 30/80
293/293 [==============================] - 5s 17ms/step - loss: 37.4889 - mean_squared_error: 37.4889
Epoch 31/80
293/293 [==============================] - 5s 17ms/step - loss: 36.7416 - mean_squared_error: 36.7416
Epoch 32/80
293/293 [==============================] - 5s 16ms/step - loss: 34.1706 - mean_squared_error: 34.1706
Epoch 33/80
293/293 [==============================] - 5s 16ms/step - loss: 29.5588 - mean_squared_error: 29.5588
Epoch 34/80
293/293 [==============================] - 5s 16ms/step - loss: 35.8357 - mean_squared_error: 35.8357
Epoch 35/80
293/293 [==============================] - 5s 16ms/step - loss: 33.4907 - mean_squared_error: 33.4907
Epoch 36/80
293/293 [==============================] - 5s 16ms/step - loss: 26.6265 - mean_squared_error: 26.6265
Epoch 37/80
293/293 [==============================] - 5s 16ms/step - loss: 23.5669 - mean_squared_error: 23.5669
Epoch 38/80
293/293 [==============================] - 5s 16ms/step - loss: 20.1027 - mean_squared_error: 20.1027
Epoch 39/80
293/293 [==============================] - 5s 16ms/step - loss: 19.0630 - mean_squared_error: 19.0630
Epoch 40/80
293/293 [==============================] - 5s 16ms/step - loss: 22.2653 - mean_squared_error: 22.2653
Epoch 41/80
293/293 [==============================] - 5s 16ms/step - loss: 28.3499 - mean_squared_error: 28.3499
Epoch 42/80
293/293 [==============================] - 5s 16ms/step - loss: 30.2943 - mean_squared_error: 30.2943
Epoch 43/80
293/293 [==============================] - 5s 16ms/step - loss: 30.8464 - mean_squared_error: 30.8464
Epoch 44/80
293/293 [==============================] - 5s 18ms/step - loss: 25.8581 - mean_squared_error: 25.8581
Epoch 45/80
293/293 [==============================] - 5s 17ms/step - loss: 22.0973 - mean_squared_error: 22.0973
Epoch 46/80
293/293 [==============================] - 5s 17ms/step - loss: 20.3286 - mean_squared_error: 20.3286
Epoch 47/80
293/293 [==============================] - 5s 16ms/step - loss: 20.7386 - mean_squared_error: 20.7386
Epoch 48/80
293/293 [==============================] - 5s 16ms/step - loss: 20.1520 - mean_squared_error: 20.1520
Epoch 49/80
293/293 [==============================] - 5s 16ms/step - loss: 21.0666 - mean_squared_error: 21.0666
Epoch 50/80
293/293 [==============================] - 5s 16ms/step - loss: 20.2202 - mean_squared_error: 20.2202
Epoch 51/80
293/293 [==============================] - 5s 16ms/step - loss: 20.7954 - mean_squared_error: 20.7954
Epoch 52/80
293/293 [==============================] - 5s 16ms/step - loss: 16.0701 - mean_squared_error: 16.0701
Epoch 53/80
293/293 [==============================] - 5s 16ms/step - loss: 16.0172 - mean_squared_error: 16.0172
Epoch 54/80
293/293 [==============================] - 5s 16ms/step - loss: 16.2924 - mean_squared_error: 16.2924
Epoch 55/80
293/293 [==============================] - 5s 16ms/step - loss: 16.6287 - mean_squared_error: 16.6287
Epoch 56/80
293/293 [==============================] - 5s 16ms/step - loss: 16.3168 - mean_squared_error: 16.3168
Epoch 57/80
293/293 [==============================] - 5s 16ms/step - loss: 18.8847 - mean_squared_error: 18.8847
Epoch 58/80
293/293 [==============================] - 5s 16ms/step - loss: 20.1943 - mean_squared_error: 20.1943
Epoch 59/80
293/293 [==============================] - 5s 16ms/step - loss: 22.6101 - mean_squared_error: 22.6101
Epoch 60/80
293/293 [==============================] - 5s 16ms/step - loss: 19.8736 - mean_squared_error: 19.8736
Epoch 61/80
293/293 [==============================] - 5s 16ms/step - loss: 19.8947 - mean_squared_error: 19.8947
Epoch 62/80
293/293 [==============================] - 5s 16ms/step - loss: 19.0540 - mean_squared_error: 19.0540
Epoch 63/80
293/293 [==============================] - 5s 16ms/step - loss: 18.1842 - mean_squared_error: 18.1842
Epoch 64/80
293/293 [==============================] - 5s 16ms/step - loss: 15.4725 - mean_squared_error: 15.4725
Epoch 65/80
293/293 [==============================] - 5s 16ms/step - loss: 16.2298 - mean_squared_error: 16.2298
Epoch 66/80
293/293 [==============================] - 5s 16ms/step - loss: 13.2303 - mean_squared_error: 13.2303
Epoch 67/80
293/293 [==============================] - 5s 16ms/step - loss: 14.2212 - mean_squared_error: 14.2212
Epoch 68/80
293/293 [==============================] - 5s 16ms/step - loss: 12.7895 - mean_squared_error: 12.7895
Epoch 69/80
293/293 [==============================] - 5s 18ms/step - loss: 15.7551 - mean_squared_error: 15.7551
Epoch 70/80
293/293 [==============================] - 5s 17ms/step - loss: 18.4030 - mean_squared_error: 18.4030
Epoch 71/80
293/293 [==============================] - 5s 16ms/step - loss: 16.0214 - mean_squared_error: 16.0214
Epoch 72/80
293/293 [==============================] - 5s 17ms/step - loss: 12.3694 - mean_squared_error: 12.3694
Epoch 73/80
293/293 [==============================] - 5s 17ms/step - loss: 10.5107 - mean_squared_error: 10.5107
Epoch 74/80
293/293 [==============================] - 5s 17ms/step - loss: 11.7746 - mean_squared_error: 11.7746
Epoch 75/80
293/293 [==============================] - 5s 16ms/step - loss: 11.6589 - mean_squared_error: 11.6589
Epoch 76/80
293/293 [==============================] - 5s 16ms/step - loss: 14.1691 - mean_squared_error: 14.1691
Epoch 77/80
293/293 [==============================] - 5s 16ms/step - loss: 15.9365 - mean_squared_error: 15.9365
Epoch 78/80
293/293 [==============================] - 5s 17ms/step - loss: 15.4616 - mean_squared_error: 15.4616
Epoch 79/80
293/293 [==============================] - 5s 17ms/step - loss: 13.2958 - mean_squared_error: 13.2958
Epoch 80/80
293/293 [==============================] - 5s 17ms/step - loss: 11.3826 - mean_squared_error: 11.3826
<keras.callbacks.History at 0x1b61bab69d0>
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('test-A/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
#df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
df2_train = pd.read_csv('dev-0/in.tsv', names=in_columns, sep='\t')
df_train = pd.concat([df_train, df2_train])
x_test = pd.concat([x_test,df_train])
len(x_test)
10080
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)
10080
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
595 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
596 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
597 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
598 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
599 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

10080 rows × 73 columns

x_test = x_test.iloc[:-9360]
x_test
id_stacji_249180010 id_stacji_249190560 id_stacji_249200370 id_stacji_249200490 id_stacji_249220150 id_stacji_249220180 id_stacji_250190160 id_stacji_250190390 id_stacji_250210130 id_stacji_251170090 ... miesiąc_3 miesiąc_4 miesiąc_5 miesiąc_6 miesiąc_7 miesiąc_8 miesiąc_9 miesiąc_10 miesiąc_11 miesiąc_12
0 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
4 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
715 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
716 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
717 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
718 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
719 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

720 rows × 73 columns

pred= model.predict(x_test)
23/23 [==============================] - 0s 4ms/step
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)
23/23 [==============================] - 0s 4ms/step