import torch import torch.nn as nn import numpy as np import pandas as pd import sys import torch.nn.functional as F import pickle from torch.utils.data import TensorDataset, DataLoader TRAIN_FILE_PATH = 'train/train.tsv' #prepare data methods def read_data_file(filepath): df = pd.read_csv(filepath, sep='\t', header=None, index_col=None) dataframe = df.iloc[:, [0,8,11]] dataframe.columns = ['price','biggy','type'] #print(dataframe.size[0]) for x in range(len(dataframe)): dataframe['biggy'].loc[x] = dataframe['biggy'].loc[x].replace(" ","") #such dumb solution, well, but at least it works dataframe['bias'] = 1 dataframe['biggy'] = dataframe['biggy'].astype(float) return dataframe def dataframe_to_arrays(dataframe): dataframe1 = dataframe.copy(deep=True) dataframe1["type"] = dataframe1["type"].astype('category').cat.codes inputs_array = dataframe1[input_cols].to_numpy() targets_array = dataframe1[output_cols].to_numpy() return inputs_array, targets_array data = read_data_file(TRAIN_FILE_PATH) input_cols = data.columns.values[1:] output_cols = data.columns.values[:1] inputs_array_training, targets_array_training = dataframe_to_arrays(data) inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32) targets_training = torch.from_numpy(targets_array_training).type(torch.float32) print(inputs_training) w = torch.tensor([7201.61492633873, 1,7201.500], requires_grad=True) learning_rate = torch.tensor(0.000000000005) print("training started") for i in range(10000): y_predicted = inputs_training @ w cost = torch.sum((y_predicted - targets_training) ** 2) cost.backward() with torch.no_grad(): w -= learning_rate * w.grad w.requires_grad = True print(w) pickle.dump(w, open('model.pkl', 'wb'))