reduced classes

This commit is contained in:
Adam Wojdyla 2022-04-24 14:17:28 +02:00
parent 80974027c7
commit 863a84ac18

View File

@ -8,6 +8,7 @@ from sklearn.metrics import accuracy_score
import torch.nn.functional as F import torch.nn.functional as F
import pandas as pd import pandas as pd
from sklearn import preprocessing from sklearn import preprocessing
import matplotlib.pyplot as plt
class Model(nn.Module): class Model(nn.Module):
@ -16,7 +17,7 @@ class Model(nn.Module):
self.layer1 = nn.Linear(input_dim, 160) self.layer1 = nn.Linear(input_dim, 160)
# self.layer2 = nn.Linear(320, 160) # self.layer2 = nn.Linear(320, 160)
self.layer2 = nn.Linear(160, 80) self.layer2 = nn.Linear(160, 80)
self.layer3 = nn.Linear(80, 23) self.layer3 = nn.Linear(80, 5)
def forward(self, x): def forward(self, x):
x = F.relu(self.layer1(x)) x = F.relu(self.layer1(x))
@ -25,44 +26,63 @@ class Model(nn.Module):
return x return x
def load_dataset(): def load_dataset_raw():
""" Load data from .csv file. """ """ Load data from .csv file. """
cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',') cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
# cars = cars.iloc()
return cars return cars
def prepare_dataset(dataset): def remove_rows(dataset):
# dataset.drop(dataset[dataset['mark'] == 'alfa-romeo'].index, inplace=True)
# dataset.drop(dataset[dataset['mark'] == 'chevrolet'].index, inplace=True)
# dataset.drop(dataset[dataset['mark'] == 'mitsubishi'].index, inplace=True)
# dataset.drop(dataset[dataset['mark'] == 'mini'].index, inplace=True)
# audi bmw ford opel volkswagen
new_data = dataset.loc[(dataset['mark'] == 'audi') | (dataset['mark'] == 'bmw') | (dataset['mark'] == 'ford') | (dataset['mark'] == 'opel') | (dataset['mark'] == 'volkswagen')]
return new_data
# dataset = dataset.drop(dataset)
# return dataset
def prepare_dataset_raw(dataset):
""" Label make column""" """ Label make column"""
le = preprocessing.LabelEncoder() le = preprocessing.LabelEncoder()
mark_column = np.array(dataset[:]['mark']) mark_column = np.array(dataset[:]['mark'])
le.fit(mark_column) le.fit(mark_column)
print(list(le.classes_)) print(list(le.classes_))
labels = le.transform(mark_column) lab = le.transform(mark_column)
features = dataset.drop(['mark'], axis=1).to_numpy() feat = dataset.drop(['mark'], axis=1).to_numpy()
mm_scaler = preprocessing.MinMaxScaler() mm_scaler = preprocessing.MinMaxScaler()
features = mm_scaler.fit_transform(features) feat = mm_scaler.fit_transform(feat)
return labels, features return lab, feat
# Prepare dataset # Prepare dataset
dataset = load_dataset() print("Loading dataset...")
labels, features = prepare_dataset(dataset) dataset = load_dataset_raw()
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42, shuffle=True) print("Dataset loaded")
#
# import matplotlib
#
# plt = matplotlib.pyplot.hist(features, 16)
print("Preparing dataset...")
dataset = remove_rows(dataset)
labels, features = prepare_dataset_raw(dataset)
print("Dataset prepared")
plot = plt.hist(labels, bins=[i for i in range(len(set(labels)))], edgecolor="black")
plt.xticks(np.arange(0, len(set(labels)), 1))
plt.show()
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42,
shuffle=True)
# Training # Training
model = Model(features_train.shape[1]) model = Model(features_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss() loss_fn = nn.CrossEntropyLoss()
epochs = 1000 epochs = 100
print("Starting model training...")
x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long() x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long()
for epoch in range(1, epochs + 1): for epoch in range(1, epochs + 1):
print("Epoch #", epoch) print("Epoch #", epoch)
@ -74,21 +94,23 @@ for epoch in range(1, epochs + 1):
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() # Gradients loss.backward() # Gradients
optimizer.step() # Update optimizer.step() # Update
print("Model training finished")
print(1)
x_test = Variable(torch.from_numpy(features_test)).float() x_test = Variable(torch.from_numpy(features_test)).float()
pred = model(x_test) pred = model(x_test)
pred = pred.detach().numpy() pred = pred.detach().numpy()
print(pred)
print("The accuracy is", accuracy_score(labels_test, np.argmax(pred, axis=1))) print("The accuracy is", accuracy_score(labels_test, np.argmax(pred, axis=1)))
# Checking for first value # Checking for first value
print(np.argmax(model(x_test[0]).detach().numpy(), axis=0)) # print(np.argmax(model(x_test[0]).detach().numpy(), axis=0))
print(labels_test[0]) # print(labels_test[0])
torch.save(model, "iris-pytorch.pkl")
saved_model = torch.load("iris-pytorch.pkl") print("Saving model to file...")
torch.save(model, "CarPrices_pytorch.pkl")
print("Model saved with name: CarPrices_pytorch.pkl")
saved_model = torch.load("CarPrices_pytorch.pkl")
print(np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0)) print(np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0))
pd_predictions = pd.DataFrame(pred)
pd_predictions.to_csv("./prediction_results.csv")