#!/usr/bin/env python # coding: utf-8 # In[18]: import torch import jovian import torchvision import matplotlib import torch.nn as nn import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import torch.nn.functional as F from torchvision.datasets.utils import download_url from torch.utils.data import DataLoader, TensorDataset, random_split import random import os import sys from sacred import Experiment from sacred.observers import FileStorageObserver from sacred.observers import MongoObserver import mlflow import mlflow.keras # In[2]: ex = Experiment(save_git_info=False) ex.observers.append(FileStorageObserver('my_runs')) # ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017', db_name='sacred')) ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) mlflow.set_tracking_uri("http://172.17.0.1:5000") mlflow.set_experiment('s444354') try: numberOfEpochParam = int(sys.argv[1]) except: numberOfEpochParam = 1500 dataframe_raw = pd.read_csv("winequality-red.csv") dataframe_raw.head() # In[3]: input_cols=list(dataframe_raw.columns)[:-1] output_cols = ['quality'] input_cols,output_cols # In[4]: def dataframe_to_arrays(dataframe): dataframe1 = dataframe_raw.copy(deep=True) inputs_array = dataframe1[input_cols].to_numpy() targets_array = dataframe1[output_cols].to_numpy() return inputs_array, targets_array inputs_array, targets_array = dataframe_to_arrays(dataframe_raw) inputs_array, targets_array # In[5]: inputs = torch.from_numpy(inputs_array).type(torch.float) targets = torch.from_numpy(targets_array).type(torch.float) inputs,targets # In[6]: dataset = TensorDataset(inputs, targets) dataset # In[7]: train_ds, val_ds = random_split(dataset, [1300, 299]) batch_size=50 train_loader = DataLoader(train_ds, batch_size, shuffle=True) val_loader = DataLoader(val_ds, batch_size) lr = 1e-6 # In[8]: class WineQuality(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(input_size,output_size) def forward(self, xb): out = self.linear(xb) return out def training_step(self, batch): inputs, targets = batch # Generate predictions out = self(inputs) # Calcuate loss loss = F.l1_loss(out,targets) return loss def validation_step(self, batch): inputs, targets = batch # Generate predictions out = self(inputs) # Calculate loss loss = F.l1_loss(out,targets) return {'val_loss': loss.detach()} def validation_epoch_end(self, outputs): batch_losses = [x['val_loss'] for x in outputs] epoch_loss = torch.stack(batch_losses).mean() return {'val_loss': epoch_loss.item()} def epoch_end(self, epoch, result, num_epochs): # Print result every 100th epoch if (epoch+1) % 100 == 0 or epoch == num_epochs-1: print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss'])) # In[9]: input_size = len(input_cols) output_size = len(output_cols) # In[10]: @ex.config def my_config(): #epochs = numberOfEpochParam #epoki pobrane albo z CLI (try/catch), a jak nie przejdzie to ustawione w ex.config epochs = numberOfEpochParam lr=lr model=model train_loader=train_loader val_loader=val_loader model=WineQuality() # In[11]: def evaluate(model, val_loader): outputs = [model.validation_step(batch) for batch in val_loader] return model.validation_epoch_end(outputs) @ex.capture def fit(epochs, lr, model, train_loader, val_loader, _log, _run, opt_func=torch.optim.SGD): _log.info("log info test ") epochs=epochs history = [] optimizer = opt_func(model.parameters(), lr) with mlflow.start_run(): for epoch in range(epochs): for batch in train_loader: loss = model.training_step(batch) loss.backward() optimizer.step() optimizer.zero_grad() result = evaluate(model, val_loader) model.epoch_end(epoch, result, epochs) history.append(result) mlflow.log_param('epochs', epochs) mlflow.log_param('lr', lr) torch.save(model, 'saved_model.pth') ex.add_artifact("saved_model.pth") _run.info["epochs"] = epochs signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features)) tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme sampleInp = [0.1, 0.1, 546.0, 555.223, 1., 1., 33.16376, 84.12426] if tracking_url_type_store != "file": mlflow.keras.log_model(model, "model", registered_model_name="red-wine-quality", signature=signature) else: mlflow.keras.log_model(model, "model", signature=signature, input_example=np.array(sampleInp)) return history def predict_single(input, target, model): inputs = input.unsqueeze(0) predictions = model(inputs) prediction = predictions[0].detach() return "Target: "+str(target)+"----- Prediction: "+str(prediction)+"\n" # In[32]: #wylosuj 10 próbek predykcji for i in random.sample(range(0, len(val_ds)), 10): input_, target = val_ds[i] print(predict_single(input_, target, model),end="") # In[36]: with open("result.txt", "w+") as file: for i in range(0, len(val_ds), 1): input_, target = val_ds[i] file.write(str(predict_single(input_, target, model))) @ex.automain def main(): #my_config() #print("number of epochs is: ", epochs) history5 = fit() #ex.run()