import pandas from sklearn.preprocessing import LabelEncoder import torch import numpy as np def encoder(data): for col in data.columns: if data.dtypes[col] == "object": data[col].fillna('0', inplace=True) le = LabelEncoder() le.fit(data[col]) data[col] = le.transform(data[col]) else: data[col].fillna(0, inplace=True) return data X_train = pandas.read_csv('train/train.tsv', error_bad_lines=False, sep='\t', header=0) names = list(X_train)[1:] X_train.drop('opis', axis=1, inplace=True) y_train = X_train['cena'] X_train.drop('cena', axis=1, inplace=True) X_train = encoder(X_train) X_dev0 = pandas.read_csv('dev-0/in.tsv', sep='\t', header=None, names=names) X_dev0.drop('opis', axis=1, inplace=True) X_dev0 = encoder(X_dev0) y_dev0 = pandas.read_csv('dev-0/expected.tsv', sep='\t', header=None) X_testA = pandas.read_csv('test-A/in.tsv', sep='\t', header=None, names=names) X_testA.drop('opis', axis=1, inplace=True) X_testA = encoder(X_testA) data_train = X_train[['Powierzchnia w m2', 'Liczba pokoi']].values data_train = np.hstack((data_train, np.ones((data_train.shape[0], 1), dtype=data_train.dtype))) x = torch.tensor(data_train , dtype=torch.float) y = torch.tensor(y_train, dtype=torch.float) w = torch.tensor([0, 0, 0], dtype=torch.float, requires_grad=True) learning_rate = torch.tensor(0.00000001) for _ in range(100000): y_predicted = x @ w cost = torch.sum((y_predicted - y) ** 2) / y.size()[0] #print(_, w, " => ", cost) cost.backward() with torch.no_grad(): w = w - learning_rate * w.grad w.requires_grad_(True) w.requires_grad_(False) data_dev = X_dev0[['Powierzchnia w m2', 'Liczba pokoi']].values data_dev = np.hstack((data_dev, np.ones((data_dev.shape[0], 1), dtype=data_dev.dtype))) x_dev = torch.tensor(data_dev, dtype=torch.float) data_testA = X_testA[['Powierzchnia w m2', 'Liczba pokoi']].values data_testA = np.hstack((data_testA, np.ones((data_testA.shape[0], 1), dtype=data_testA.dtype))) x_test = torch.tensor(data_testA , dtype=torch.float) y_dev_pred = x_dev @ w np.savetxt(f'./dev-0/out.tsv', y_dev_pred.numpy(), '%.0f') y_test_pred = x_test @ w np.savetxt(f'./test-A/out.tsv', y_test_pred.numpy(), '%.0f')