diff --git a/DeepLearning/lab05_deepLearning.py b/lab05_deepLearning.py similarity index 52% rename from DeepLearning/lab05_deepLearning.py rename to lab05_deepLearning.py index 1f49020..d07d302 100644 --- a/DeepLearning/lab05_deepLearning.py +++ b/lab05_deepLearning.py @@ -8,6 +8,7 @@ from sklearn.metrics import accuracy_score import torch.nn.functional as F import pandas as pd from sklearn import preprocessing +import matplotlib.pyplot as plt class Model(nn.Module): @@ -16,7 +17,7 @@ class Model(nn.Module): self.layer1 = nn.Linear(input_dim, 160) # self.layer2 = nn.Linear(320, 160) self.layer2 = nn.Linear(160, 80) - self.layer3 = nn.Linear(80, 23) + self.layer3 = nn.Linear(80, 5) def forward(self, x): x = F.relu(self.layer1(x)) @@ -25,44 +26,63 @@ class Model(nn.Module): return x -def load_dataset(): +def load_dataset_raw(): """ Load data from .csv file. """ cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',') - # cars = cars.iloc() return cars -def prepare_dataset(dataset): +def remove_rows(dataset): + # dataset.drop(dataset[dataset['mark'] == 'alfa-romeo'].index, inplace=True) + # dataset.drop(dataset[dataset['mark'] == 'chevrolet'].index, inplace=True) + # dataset.drop(dataset[dataset['mark'] == 'mitsubishi'].index, inplace=True) + # dataset.drop(dataset[dataset['mark'] == 'mini'].index, inplace=True) + # audi bmw ford opel volkswagen + + new_data = dataset.loc[(dataset['mark'] == 'audi') | (dataset['mark'] == 'bmw') | (dataset['mark'] == 'ford') | (dataset['mark'] == 'opel') | (dataset['mark'] == 'volkswagen')] + return new_data + # dataset = dataset.drop(dataset) + # return dataset + + +def prepare_dataset_raw(dataset): """ Label make column""" le = preprocessing.LabelEncoder() mark_column = np.array(dataset[:]['mark']) le.fit(mark_column) print(list(le.classes_)) - labels = le.transform(mark_column) - features = dataset.drop(['mark'], axis=1).to_numpy() + lab = le.transform(mark_column) + feat = dataset.drop(['mark'], axis=1).to_numpy() mm_scaler = preprocessing.MinMaxScaler() - features = mm_scaler.fit_transform(features) + feat = mm_scaler.fit_transform(feat) - return labels, features + return lab, feat # Prepare dataset -dataset = load_dataset() -labels, features = prepare_dataset(dataset) -features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42, shuffle=True) -# -# import matplotlib -# -# plt = matplotlib.pyplot.hist(features, 16) +print("Loading dataset...") +dataset = load_dataset_raw() +print("Dataset loaded") +print("Preparing dataset...") +dataset = remove_rows(dataset) +labels, features = prepare_dataset_raw(dataset) +print("Dataset prepared") + +plot = plt.hist(labels, bins=[i for i in range(len(set(labels)))], edgecolor="black") +plt.xticks(np.arange(0, len(set(labels)), 1)) +plt.show() +features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42, + shuffle=True) # Training model = Model(features_train.shape[1]) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) loss_fn = nn.CrossEntropyLoss() -epochs = 1000 +epochs = 100 +print("Starting model training...") x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long() for epoch in range(1, epochs + 1): print("Epoch #", epoch) @@ -74,21 +94,23 @@ for epoch in range(1, epochs + 1): optimizer.zero_grad() loss.backward() # Gradients optimizer.step() # Update - -print(1) +print("Model training finished") x_test = Variable(torch.from_numpy(features_test)).float() pred = model(x_test) - pred = pred.detach().numpy() -print(pred) - print("The accuracy is", accuracy_score(labels_test, np.argmax(pred, axis=1))) # Checking for first value -print(np.argmax(model(x_test[0]).detach().numpy(), axis=0)) -print(labels_test[0]) -torch.save(model, "iris-pytorch.pkl") +# print(np.argmax(model(x_test[0]).detach().numpy(), axis=0)) +# print(labels_test[0]) -saved_model = torch.load("iris-pytorch.pkl") +print("Saving model to file...") +torch.save(model, "CarPrices_pytorch.pkl") +print("Model saved with name: CarPrices_pytorch.pkl") + +saved_model = torch.load("CarPrices_pytorch.pkl") print(np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0)) + +pd_predictions = pd.DataFrame(pred) +pd_predictions.to_csv("./prediction_results.csv") \ No newline at end of file