mieszkania5/train.py
Michal Maciaszek bb5d311df5 first try
2020-12-08 20:14:00 +01:00

58 lines
1.8 KiB
Python

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import sys
import torch.nn.functional as F
import pickle
from torch.utils.data import TensorDataset, DataLoader
TRAIN_FILE_PATH = 'train/train.tsv'
#prepare data methods
def read_data_file(filepath):
df = pd.read_csv(filepath, sep='\t', header=None, index_col=None)
dataframe = df.iloc[:, [0,8,11]]
dataframe.columns = ['price','biggy','type']
#print(dataframe.size[0])
for x in range(len(dataframe)):
dataframe['biggy'].loc[x] = dataframe['biggy'].loc[x].replace(" ","")
#such dumb solution, well, but at least it works
dataframe['bias'] = 1
dataframe['biggy'] = dataframe['biggy'].astype(float)
return dataframe
def dataframe_to_arrays(dataframe):
dataframe1 = dataframe.copy(deep=True)
dataframe1["type"] = dataframe1["type"].astype('category').cat.codes
inputs_array = dataframe1[input_cols].to_numpy()
targets_array = dataframe1[output_cols].to_numpy()
return inputs_array, targets_array
data = read_data_file(TRAIN_FILE_PATH)
input_cols = data.columns.values[1:]
output_cols = data.columns.values[:1]
inputs_array_training, targets_array_training = dataframe_to_arrays(data)
inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32)
targets_training = torch.from_numpy(targets_array_training).type(torch.float32)
print(inputs_training)
w = torch.tensor([7201.61492633873, 1,7201.500], requires_grad=True)
learning_rate = torch.tensor(0.000000000005)
print("training started")
for i in range(10000):
y_predicted = inputs_training @ w
cost = torch.sum((y_predicted - targets_training) ** 2)
cost.backward()
with torch.no_grad():
w -= learning_rate * w.grad
w.requires_grad = True
print(w)
pickle.dump(w, open('model.pkl', 'wb'))