This commit is contained in:
parent
be740d8b71
commit
3d6749570d
@ -6,7 +6,7 @@ from torch import nn
|
||||
from torch.autograd import Variable
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.metrics import accuracy_score, f1_score
|
||||
import torch.nn.functional as F
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
@ -33,28 +33,31 @@ def load_dataset_raw():
|
||||
return cars
|
||||
|
||||
|
||||
def remove_rows(dataset):
|
||||
# dataset.drop(dataset[dataset['mark'] == 'alfa-romeo'].index, inplace=True)
|
||||
# dataset.drop(dataset[dataset['mark'] == 'chevrolet'].index, inplace=True)
|
||||
# dataset.drop(dataset[dataset['mark'] == 'mitsubishi'].index, inplace=True)
|
||||
# dataset.drop(dataset[dataset['mark'] == 'mini'].index, inplace=True)
|
||||
# audi bmw ford opel volkswagen
|
||||
def load_dataset_files():
|
||||
""" Load shuffled, splitted dev and train files from .csv files. """
|
||||
|
||||
new_data = dataset.loc[(dataset['mark'] == 'audi') | (dataset['mark'] == 'bmw') | (dataset['mark'] == 'ford') | (dataset['mark'] == 'opel') | (dataset['mark'] == 'volkswagen')]
|
||||
return new_data
|
||||
# dataset = dataset.drop(dataset)
|
||||
# return dataset
|
||||
cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
|
||||
return cars_dev, cars_train
|
||||
|
||||
|
||||
def prepare_dataset_raw(dataset):
|
||||
def remove_rows(data_dev, data_train):
|
||||
dev_removed_rows = data_dev.loc[(data_dev['0'] == 'audi') | (data_dev['0'] == 'bmw') | (data_dev['0'] == 'ford') | (data_dev['0'] == 'opel') | (data_dev['0'] == 'volkswagen')]
|
||||
train_removed_rows = data_train.loc[(data_train['0'] == 'audi') | (data_train['0'] == 'bmw') | (data_train['0'] == 'ford') | (data_train['0'] == 'opel') | (data_train['0'] == 'volkswagen')]
|
||||
|
||||
return dev_removed_rows, train_removed_rows
|
||||
|
||||
|
||||
def prepare_labels_features(dataset):
|
||||
""" Label make column"""
|
||||
le = preprocessing.LabelEncoder()
|
||||
mark_column = np.array(dataset[:]['mark'])
|
||||
mark_column = np.array(dataset[:]['0'])
|
||||
le.fit(mark_column)
|
||||
|
||||
print(list(le.classes_))
|
||||
lab = le.transform(mark_column)
|
||||
feat = dataset.drop(['mark'], axis=1).to_numpy()
|
||||
feat = dataset.drop(['0'], axis=1).to_numpy()
|
||||
|
||||
mm_scaler = preprocessing.MinMaxScaler()
|
||||
feat = mm_scaler.fit_transform(feat)
|
||||
@ -62,6 +65,9 @@ def prepare_dataset_raw(dataset):
|
||||
return lab, feat
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# def draw_plot(lbl):
|
||||
# need to import matplotlib to work
|
||||
# plt.hist(lbl, bins=[i for i in range(len(set(lbl)))], edgecolor="black")
|
||||
@ -70,17 +76,15 @@ def prepare_dataset_raw(dataset):
|
||||
|
||||
# Prepare dataset
|
||||
print("Loading dataset...")
|
||||
dataset = load_dataset_raw()
|
||||
dev, train = load_dataset_files()
|
||||
print("Dataset loaded")
|
||||
|
||||
print("Preparing dataset...")
|
||||
dataset = remove_rows(dataset)
|
||||
labels, features = prepare_dataset_raw(dataset)
|
||||
dev, train = remove_rows(dev, train)
|
||||
labels_train, features_train = prepare_labels_features(train)
|
||||
labels_test, features_test = prepare_labels_features(dev)
|
||||
print("Dataset prepared")
|
||||
|
||||
|
||||
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42,
|
||||
shuffle=True)
|
||||
# Training
|
||||
model = Model(features_train.shape[1])
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
||||
@ -92,7 +96,7 @@ try:
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Setting default epochs value to 1000.")
|
||||
epochs = 1000
|
||||
epochs = 100
|
||||
|
||||
print(f"Number of epochs: {epochs}")
|
||||
|
||||
@ -113,7 +117,8 @@ print("Model training finished")
|
||||
x_test = Variable(torch.from_numpy(features_test)).float()
|
||||
pred = model(x_test)
|
||||
pred = pred.detach().numpy()
|
||||
print("The accuracy is", accuracy_score(labels_test, np.argmax(pred, axis=1)))
|
||||
print(f"The accuracy metric is: {accuracy_score(labels_test, np.argmax(pred, axis=1))}")
|
||||
|
||||
|
||||
# Checking for first value
|
||||
# print(np.argmax(model(x_test[0]).detach().numpy(), axis=0))
|
||||
|
75
lab06_evaluation.py
Normal file
75
lab06_evaluation.py
Normal file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import numpy as np
|
||||
from torch.autograd import Variable
|
||||
from sklearn.metrics import accuracy_score, f1_score
|
||||
from csv import DictWriter
|
||||
import torch.nn.functional as F
|
||||
import sys
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_dim):
|
||||
super(Model, self).__init__()
|
||||
self.layer1 = nn.Linear(input_dim, 100)
|
||||
self.layer2 = nn.Linear(100, 60)
|
||||
self.layer3 = nn.Linear(60, 5)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.layer1(x))
|
||||
x = F.relu(self.layer2(x))
|
||||
x = F.softmax(self.layer3(x)) # To check with the loss function
|
||||
return x
|
||||
|
||||
def prepare_labels_features(dataset):
|
||||
""" Label make column"""
|
||||
le = preprocessing.LabelEncoder()
|
||||
mark_column = np.array(dataset[:]['0'])
|
||||
le.fit(mark_column)
|
||||
|
||||
print(list(le.classes_))
|
||||
lab = le.transform(mark_column)
|
||||
feat = dataset.drop(['0'], axis=1).to_numpy()
|
||||
|
||||
mm_scaler = preprocessing.MinMaxScaler()
|
||||
feat = mm_scaler.fit_transform(feat)
|
||||
|
||||
return lab, feat
|
||||
|
||||
|
||||
def print_metrics(test_labels, predictions):
|
||||
# take column with max predicted score
|
||||
f1 = f1_score(labels_test, np.argmax(predictions, axis=1), average='weighted')
|
||||
accuracy = accuracy_score(test_labels, np.argmax(predictions, axis=1))
|
||||
print(f"The F1_score metric is: {f1}")
|
||||
print(f"The accuracy metric is: {accuracy}")
|
||||
|
||||
try:
|
||||
# build_number = sys.argv[1]
|
||||
build_number = 1
|
||||
field_names = ['BUILD_NUMBER', 'F1', 'ACCURACY']
|
||||
dict = {'BUILD_NUMBER': build_number, 'F1': f1, 'ACCURACY': accuracy }
|
||||
|
||||
with open('metrics.csv', 'a') as metrics_file:
|
||||
dictwriter_object = DictWriter(metrics_file, fieldnames=field_names)
|
||||
dictwriter_object.writerow(dict)
|
||||
metrics_file.close()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
model = torch.load("CarPrices_pytorch_model.pkl")
|
||||
cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names=[str(i) for i in range(5)])
|
||||
cars_dev = cars_dev.loc[(cars_dev['0'] == 'audi') | (cars_dev['0'] == 'bmw') | (cars_dev['0'] == 'ford') | (cars_dev['0'] == 'opel') | (cars_dev['0'] == 'volkswagen')]
|
||||
labels_test, features_test = prepare_labels_features(cars_dev)
|
||||
|
||||
x_test = Variable(torch.from_numpy(features_test)).float()
|
||||
pred = model(x_test)
|
||||
pred = pred.detach().numpy()
|
||||
print_metrics(labels_test, pred)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user