59 lines
1.8 KiB
Python
59 lines
1.8 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pickle
|
|
|
|
|
|
TRAIN_FILE_PATH = 'train/train.tsv'
|
|
|
|
|
|
#prepare data methods
|
|
def read_data_file(filepath):
|
|
df = pd.read_csv(filepath, sep='\t', header=None, index_col=None)
|
|
dataframe = df.iloc[:, [0,8,11]]
|
|
dataframe.columns = ['price','biggy','type']
|
|
|
|
for x in range(len(dataframe)):
|
|
dataframe['biggy'].loc[x] = dataframe['biggy'].loc[x].replace(" ","")
|
|
#such dumb solution, well, but at least it works
|
|
dataframe['bias'] = 1
|
|
dataframe['biggy'] = dataframe['biggy'].astype(float)
|
|
return dataframe
|
|
|
|
|
|
def dataframe_to_arrays(dataframe):
|
|
dataframe1 = dataframe.copy(deep=True)
|
|
#dataframe1["type"] = dataframe1["type"].astype('category').cat.codes
|
|
|
|
dataframe1 = pd.get_dummies(dataframe1, columns =['type'])
|
|
print(dataframe1.columns)
|
|
input_cols = dataframe1.columns.values[1:]
|
|
output_cols = dataframe1.columns.values[:1]
|
|
inputs_array = dataframe1[input_cols].to_numpy()
|
|
targets_array = dataframe1[output_cols].to_numpy()
|
|
return inputs_array, targets_array
|
|
|
|
data = read_data_file(TRAIN_FILE_PATH)
|
|
|
|
|
|
|
|
|
|
inputs_array_training, targets_array_training = dataframe_to_arrays(data)
|
|
|
|
inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32)
|
|
targets_training = torch.from_numpy(targets_array_training).type(torch.float32)
|
|
|
|
weights = torch.tensor([1.0, 300000.0,1.0, 1.0, 1.0, 1.0,1.0,1.0,1.0], requires_grad=True)
|
|
learning_rate = torch.tensor(0.0000000000005)
|
|
print("training started")
|
|
for i in range(300):
|
|
y_predicted = inputs_training @ weights
|
|
cost = torch.sum((y_predicted - targets_training) ** 2)
|
|
cost.backward()
|
|
with torch.no_grad():
|
|
weights -= learning_rate * weights.grad
|
|
weights.requires_grad = True
|
|
|
|
print(weights)
|
|
pickle.dump(weights, open('model.pkl', 'wb')) |