73 lines
2.2 KiB
Python
73 lines
2.2 KiB
Python
import pandas
|
|
from sklearn.preprocessing import LabelEncoder
|
|
import torch
|
|
import numpy as np
|
|
|
|
def encoder(data):
|
|
for col in data.columns:
|
|
if data.dtypes[col] == "object":
|
|
data[col].fillna('0', inplace=True)
|
|
le = LabelEncoder()
|
|
le.fit(data[col])
|
|
data[col] = le.transform(data[col])
|
|
else:
|
|
data[col].fillna(0, inplace=True)
|
|
|
|
return data
|
|
|
|
|
|
X_train = pandas.read_csv('train/train.tsv', error_bad_lines=False, sep='\t', header=0)
|
|
names = list(X_train)[1:]
|
|
X_train.drop('opis', axis=1, inplace=True)
|
|
y_train = X_train['cena']
|
|
X_train.drop('cena', axis=1, inplace=True)
|
|
|
|
X_train = encoder(X_train)
|
|
|
|
|
|
X_dev0 = pandas.read_csv('dev-0/in.tsv', sep='\t', header=None, names=names)
|
|
X_dev0.drop('opis', axis=1, inplace=True)
|
|
X_dev0 = encoder(X_dev0)
|
|
y_dev0 = pandas.read_csv('dev-0/expected.tsv', sep='\t', header=None)
|
|
|
|
X_testA = pandas.read_csv('test-A/in.tsv', sep='\t', header=None, names=names)
|
|
X_testA.drop('opis', axis=1, inplace=True)
|
|
X_testA = encoder(X_testA)
|
|
|
|
data_train = X_train[['Powierzchnia w m2', 'Liczba pokoi']].values
|
|
data_train = np.hstack((data_train, np.ones((data_train.shape[0], 1), dtype=data_train.dtype)))
|
|
|
|
|
|
x = torch.tensor(data_train , dtype=torch.float)
|
|
y = torch.tensor(y_train, dtype=torch.float)
|
|
w = torch.tensor([0, 0, 0], dtype=torch.float, requires_grad=True)
|
|
learning_rate = torch.tensor(0.00000001)
|
|
|
|
for _ in range(100000):
|
|
y_predicted = x @ w
|
|
cost = torch.sum((y_predicted - y) ** 2) / y.size()[0]
|
|
#print(_, w, " => ", cost)
|
|
cost.backward()
|
|
with torch.no_grad():
|
|
w = w - learning_rate * w.grad
|
|
w.requires_grad_(True)
|
|
w.requires_grad_(False)
|
|
|
|
|
|
|
|
data_dev = X_dev0[['Powierzchnia w m2', 'Liczba pokoi']].values
|
|
data_dev = np.hstack((data_dev, np.ones((data_dev.shape[0], 1), dtype=data_dev.dtype)))
|
|
|
|
x_dev = torch.tensor(data_dev, dtype=torch.float)
|
|
|
|
|
|
data_testA = X_testA[['Powierzchnia w m2', 'Liczba pokoi']].values
|
|
data_testA = np.hstack((data_testA, np.ones((data_testA.shape[0], 1), dtype=data_testA.dtype)))
|
|
x_test = torch.tensor(data_testA , dtype=torch.float)
|
|
|
|
y_dev_pred = x_dev @ w
|
|
np.savetxt(f'./dev-0/out.tsv', y_dev_pred.numpy(), '%.0f')
|
|
|
|
y_test_pred = x_test @ w
|
|
np.savetxt(f'./test-A/out.tsv', y_test_pred.numpy(), '%.0f')
|