created seperate zad1 file with mlflow so it doesn't break the whole pipeline
This commit is contained in:
parent
d5014fbb40
commit
734794bed8
131
zad1.ipynb
131
zad1.ipynb
File diff suppressed because one or more lines are too long
58
zad1.py
58
zad1.py
@ -1,24 +1,6 @@
|
||||
import pandas as pd
|
||||
import sklearn.model_selection
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='IUM script')
|
||||
parser.add_argument('--num_epochs', type=int, default=10, help='Number of epochs')
|
||||
parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
|
||||
parser.add_argument('--alpha', type=float, default=0.001, help='Learning rate')
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
mlflow.set_experiment("s487176")
|
||||
|
||||
import requests
|
||||
|
||||
url = "https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv"
|
||||
@ -105,31 +87,30 @@ model = TabularModel(input_dim, hidden_dim, output_dim)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters())
|
||||
|
||||
num_epochs = args.num_epochs
|
||||
lr = args.lr
|
||||
alpha = args.alpha
|
||||
num_epochs = 10
|
||||
lr = 0.001
|
||||
alpha = 0.001
|
||||
model = TabularModel(input_dim=len(wine_train.columns)-1, hidden_dim=hidden_dim, output_dim=output_dim)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=alpha)
|
||||
with mlflow.start_run():
|
||||
mlflow.log_params({"learning rate":lr,"alpha":alpha})
|
||||
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(train_dataloader, 0):
|
||||
inputs, labels = data
|
||||
labels = labels.type(torch.LongTensor)
|
||||
optimizer.zero_grad()
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
running_loss += loss.item()
|
||||
|
||||
# Print the loss every 1000 mini-batches
|
||||
if (epoch%2) == 0:
|
||||
print(f'Epoch {epoch + 1}, loss: {running_loss / len(train_dataloader):.4f}')
|
||||
for epoch in range(num_epochs):
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(train_dataloader, 0):
|
||||
inputs, labels = data
|
||||
labels = labels.type(torch.LongTensor)
|
||||
optimizer.zero_grad()
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
running_loss += loss.item()
|
||||
|
||||
# Print the loss every 1000 mini-batches
|
||||
if (epoch%2) == 0:
|
||||
print(f'Epoch {epoch + 1}, loss: {running_loss / len(train_dataloader):.4f}')
|
||||
|
||||
print('Finished Training')
|
||||
|
||||
@ -145,6 +126,3 @@ with torch.no_grad():
|
||||
|
||||
accuracy= 100 * correct / total
|
||||
print('Accuracy on test set: %d %%' % accuracy)
|
||||
|
||||
mlflow.log_metric("test_accuracy", accuracy)
|
||||
mlflow.sklearn.log_model(model, "model")
|
150
zad1_mlflow.py
Normal file
150
zad1_mlflow.py
Normal file
@ -0,0 +1,150 @@
|
||||
import pandas as pd
|
||||
import sklearn.model_selection
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='IUM script')
|
||||
parser.add_argument('--num_epochs', type=int, default=10, help='Number of epochs')
|
||||
parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
|
||||
parser.add_argument('--alpha', type=float, default=0.001, help='Learning rate')
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
mlflow.set_experiment("s487176")
|
||||
|
||||
import requests
|
||||
|
||||
url = "https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv"
|
||||
save_path = "Wine_Quality_Data.csv"
|
||||
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(save_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
wine_dataset = pd.read_csv("Wine_Quality_Data.csv")
|
||||
wine_dataset['color'] = wine_dataset['color'].replace({'red': 1, 'white': 0})
|
||||
for column in wine_dataset.columns:
|
||||
wine_dataset[column] = wine_dataset[column] / wine_dataset[column].abs().max() # normalizacja
|
||||
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
wine_train, wine_test = sklearn.model_selection.train_test_split(wine_dataset, test_size=0.1, random_state=1, stratify=wine_dataset["color"])
|
||||
wine_train["color"].value_counts()
|
||||
# podzielenie na train i test
|
||||
|
||||
wine_test["color"].value_counts()
|
||||
|
||||
|
||||
wine_test, wine_val = sklearn.model_selection.train_test_split(wine_test, test_size=0.5, random_state=1, stratify=wine_test["color"]) # podzielenie na test i validation
|
||||
|
||||
wine_test["color"].value_counts()
|
||||
|
||||
wine_val["color"].value_counts()
|
||||
|
||||
import seaborn as sns
|
||||
sns.set_theme()
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
|
||||
|
||||
class TabularDataset(Dataset):
|
||||
def __init__(self, data):
|
||||
self.data = data.values.astype('float32')
|
||||
|
||||
def __getitem__(self, index):
|
||||
x = torch.tensor(self.data[index, :-1])
|
||||
y = torch.tensor(self.data[index, -1])
|
||||
return x, y
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
|
||||
batch_size = 64
|
||||
train_dataset = TabularDataset(wine_train)
|
||||
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||
test_dataset = TabularDataset(wine_test)
|
||||
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
||||
|
||||
|
||||
class TabularModel(nn.Module):
|
||||
def __init__(self, input_dim, hidden_dim, output_dim):
|
||||
super(TabularModel, self).__init__()
|
||||
self.fc1 = nn.Linear(input_dim, hidden_dim)
|
||||
self.relu = nn.ReLU()
|
||||
self.fc2 = nn.Linear(hidden_dim, output_dim)
|
||||
self.softmax = nn.Softmax(dim=1)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc1(x)
|
||||
out = self.relu(out)
|
||||
out = self.fc2(out)
|
||||
out = self.softmax(out)
|
||||
return out
|
||||
|
||||
def predict(self, x):
|
||||
with torch.no_grad():
|
||||
output = self.forward(x)
|
||||
_, predicted = torch.max(output, 1)
|
||||
return predicted
|
||||
|
||||
input_dim = wine_train.shape[1] - 1
|
||||
hidden_dim = 32
|
||||
output_dim = 2
|
||||
model = TabularModel(input_dim, hidden_dim, output_dim)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters())
|
||||
|
||||
num_epochs = args.num_epochs
|
||||
lr = args.lr
|
||||
alpha = args.alpha
|
||||
model = TabularModel(input_dim=len(wine_train.columns)-1, hidden_dim=hidden_dim, output_dim=output_dim)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=alpha)
|
||||
with mlflow.start_run():
|
||||
mlflow.log_params({"learning rate":lr,"alpha":alpha})
|
||||
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(train_dataloader, 0):
|
||||
inputs, labels = data
|
||||
labels = labels.type(torch.LongTensor)
|
||||
optimizer.zero_grad()
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
running_loss += loss.item()
|
||||
|
||||
# Print the loss every 1000 mini-batches
|
||||
if (epoch%2) == 0:
|
||||
print(f'Epoch {epoch + 1}, loss: {running_loss / len(train_dataloader):.4f}')
|
||||
|
||||
print('Finished Training')
|
||||
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for data in test_dataloader:
|
||||
inputs, labels = data
|
||||
predicted = model.predict(inputs.float())
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
accuracy= 100 * correct / total
|
||||
print('Accuracy on test set: %d %%' % accuracy)
|
||||
|
||||
mlflow.log_metric("test_accuracy", accuracy)
|
||||
mlflow.sklearn.log_model(model, "model")
|
Loading…
Reference in New Issue
Block a user