2022-04-24 20:51:38 +02:00
|
|
|
import torch
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
from tqdm import tqdm
|
|
|
|
import matplotlib
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import seaborn as sns
|
|
|
|
|
2022-06-05 09:09:13 +02:00
|
|
|
train_dataset = pd.read_csv('train_dataset_dvc.csv')
|
|
|
|
test_dataset = pd.read_csv('test_dataset_dvc.csv')
|
2022-04-24 20:51:38 +02:00
|
|
|
|
|
|
|
X_train = train_dataset.drop(columns=['No-show']).to_numpy()
|
|
|
|
X_test = test_dataset.drop(columns=['No-show']).to_numpy()
|
|
|
|
y_train = train_dataset['No-show'].to_numpy()
|
|
|
|
y_test = test_dataset['No-show'].to_numpy()
|
|
|
|
|
|
|
|
class LogisticRegression(torch.nn.Module):
|
|
|
|
def __init__(self, input_dim, output_dim):
|
|
|
|
super(LogisticRegression, self).__init__()
|
|
|
|
self.linear = torch.nn.Linear(input_dim, output_dim)
|
|
|
|
def forward(self, x):
|
|
|
|
outputs = torch.sigmoid(self.linear(x))
|
|
|
|
return outputs
|
|
|
|
|
|
|
|
epochs = 50_000
|
|
|
|
input_dim = 9
|
|
|
|
output_dim = 1
|
|
|
|
learning_rate = 0.01
|
|
|
|
|
|
|
|
model = LogisticRegression(input_dim, output_dim)
|
|
|
|
|
|
|
|
criterion = torch.nn.BCELoss()
|
|
|
|
|
|
|
|
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
|
|
|
|
|
|
|
|
X_train, X_test = torch.Tensor(X_train),torch.Tensor(X_test)
|
|
|
|
y_train, y_test = torch.Tensor(y_train),torch.Tensor(y_test)
|
|
|
|
|
|
|
|
losses = []
|
|
|
|
losses_test = []
|
|
|
|
Iterations = []
|
|
|
|
iter = 0
|
|
|
|
for epoch in tqdm(range(int(epochs)), desc='Training Epochs'):
|
|
|
|
x = X_train
|
|
|
|
labels = y_train
|
|
|
|
optimizer.zero_grad() # Setting our stored gradients equal to zero
|
|
|
|
outputs = model(X_train)
|
|
|
|
loss = criterion(torch.squeeze(outputs), labels)
|
|
|
|
|
|
|
|
loss.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias
|
|
|
|
|
|
|
|
optimizer.step() # Updates weights and biases with the optimizer (SGD)
|
|
|
|
|
|
|
|
iter+=1
|
|
|
|
if iter%10000==0:
|
|
|
|
with torch.no_grad():
|
|
|
|
# Calculating the loss and accuracy for the test dataset
|
|
|
|
correct_test = 0
|
|
|
|
total_test = 0
|
|
|
|
outputs_test = torch.squeeze(model(X_test))
|
|
|
|
loss_test = criterion(outputs_test, y_test)
|
|
|
|
|
|
|
|
predicted_test = outputs_test.round().detach().numpy()
|
|
|
|
total_test += y_test.size(0)
|
|
|
|
correct_test += np.sum(predicted_test == y_test.detach().numpy())
|
|
|
|
accuracy_test = 100 * correct_test/total_test
|
|
|
|
losses_test.append(loss_test.item())
|
|
|
|
|
|
|
|
# Calculating the loss and accuracy for the train dataset
|
|
|
|
total = 0
|
|
|
|
correct = 0
|
|
|
|
total += y_train.size(0)
|
|
|
|
correct += np.sum(torch.squeeze(outputs).round().detach().numpy() == y_train.detach().numpy())
|
|
|
|
accuracy = 100 * correct/total
|
|
|
|
losses.append(loss.item())
|
|
|
|
Iterations.append(iter)
|
|
|
|
|
|
|
|
print(f"Iteration: {iter}. \nTest - Loss: {loss_test.item()}. Accuracy: {accuracy_test}")
|
|
|
|
print(f"Train - Loss: {loss.item()}. Accuracy: {accuracy}\n")
|
|
|
|
|
|
|
|
with open("logs.txt", "a") as myfile:
|
2022-06-05 09:09:13 +02:00
|
|
|
myfile.write(f"loss={loss.item()}, accuracy={accuracy}\n")
|
|
|
|
|
|
|
|
with open("logs_dvc.txt", "a") as myfile:
|
|
|
|
myfile.write(f"loss={loss.item()}, accuracy={accuracy}\n")
|
|
|
|
|