In [1]:
# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense

In [2]:
in_columns = ['id_stacji', 'nazwa_stacji', 'typ_zbioru', 'rok', 'miesiąc']

df = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')
len(df)

8760

In [3]:
df_test = pd.read_csv('test-A/in.tsv', names=in_columns, sep='\t')
len(df_test)

720

In [4]:
df = pd.concat([df,df_test])
len(df)

9480

In [5]:
df = df.drop(['nazwa_stacji','typ_zbioru'], axis=1)

In [6]:
x = pd.get_dummies(df,columns = ['id_stacji','rok','miesiąc'])
x

Unnamed: 0,id_stacji_249180010,id_stacji_249190560,id_stacji_249200370,id_stacji_249200490,id_stacji_249220150,id_stacji_249220180,id_stacji_250190160,id_stacji_250190390,id_stacji_250210130,id_stacji_251170090,...,miesiąc_3,miesiąc_4,miesiąc_5,miesiąc_6,miesiąc_7,miesiąc_8,miesiąc_9,miesiąc_10,miesiąc_11,miesiąc_12
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
716,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
717,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
718,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [7]:
x = x.iloc[:-720]

In [8]:
x

Unnamed: 0,id_stacji_249180010,id_stacji_249190560,id_stacji_249200370,id_stacji_249200490,id_stacji_249220150,id_stacji_249220180,id_stacji_250190160,id_stacji_250190390,id_stacji_250210130,id_stacji_251170090,...,miesiąc_3,miesiąc_4,miesiąc_5,miesiąc_6,miesiąc_7,miesiąc_8,miesiąc_9,miesiąc_10,miesiąc_11,miesiąc_12
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8756,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
8757,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
8758,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [9]:
y = pd.read_csv('train/expected.tsv', sep='\t', names=['rainfall'])
#y = np.array(y).reshape(1,-1)
y

Unnamed: 0,rainfall
0,19.4
1,43.2
2,72.2
3,25.3
4,89.3
...,...
8755,114.9
8756,101.2
8757,20.4
8758,93.2


In [10]:
# Define model
model = Sequential()
model.add(Dense(16, input_dim=73, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(64, activation= "relu"))
model.add(Dense(32, activation= "relu"))
model.add(Dense(16, activation= "relu"))
model.add(Dense(1))
#model.summary() #Print model Summary

In [11]:
df['id_stacji'] = np.asarray(df['id_stacji']).astype('float32')
df['rok'] = np.asarray(df['rok']).astype('float32')
df['miesiąc'] = np.asarray(df['miesiąc']).astype('float32')

In [12]:
y = np.asarray(y).astype('float32')

In [13]:
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]

(None, 73) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
dense (None, 73) float32
dense_1 (None, 16) float32
dense_2 (None, 32) float32
dense_3 (None, 64) float32
dense_4 (None, 32) float32
dense_5 (None, 16) float32


[None, None, None, None, None, None]

In [14]:
model.compile(loss= "mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
model.fit(x, y, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x2ae269e2610>

In [15]:
import math
math.sqrt(298.7904)

17.28555466278129

In [16]:
x_test = pd.read_csv('test-A/in.tsv', sep='\t', names=in_columns)
#y_test = pd.read_csv('dev-0/expected.tsv', sep='\t',names=['rainfall'])
#x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
df_train = pd.read_csv('train/in.tsv', names=in_columns, sep='\t')

In [17]:
x_test = pd.concat([x_test,df_train])
len(x_test)

9480

In [18]:
x_test = x_test.drop(['nazwa_stacji', 'typ_zbioru'],axis=1)
len(x_test)

9480

In [19]:
x_test = pd.get_dummies(x_test,columns = ['id_stacji','rok','miesiąc'])
x_test

Unnamed: 0,id_stacji_249180010,id_stacji_249190560,id_stacji_249200370,id_stacji_249200490,id_stacji_249220150,id_stacji_249220180,id_stacji_250190160,id_stacji_250190390,id_stacji_250210130,id_stacji_251170090,...,miesiąc_3,miesiąc_4,miesiąc_5,miesiąc_6,miesiąc_7,miesiąc_8,miesiąc_9,miesiąc_10,miesiąc_11,miesiąc_12
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8756,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
8757,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
8758,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [20]:
x_test = x_test.iloc[:-8760]
x_test

Unnamed: 0,id_stacji_249180010,id_stacji_249190560,id_stacji_249200370,id_stacji_249200490,id_stacji_249220150,id_stacji_249220180,id_stacji_250190160,id_stacji_250190390,id_stacji_250210130,id_stacji_251170090,...,miesiąc_3,miesiąc_4,miesiąc_5,miesiąc_6,miesiąc_7,miesiąc_8,miesiąc_9,miesiąc_10,miesiąc_11,miesiąc_12
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
716,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
717,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
718,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [21]:
pred= model.predict(x_test)



In [22]:
pred= model.predict(x_test)
out = pd.DataFrame(pred)
out.to_csv('test-A/out.tsv',sep='\t',header=False, index=False)

