diff --git a/Jenkinsfile_train b/Jenkinsfile_train index 76a6275..bd918cd 100644 --- a/Jenkinsfile_train +++ b/Jenkinsfile_train @@ -31,7 +31,7 @@ pipeline { steps { catchError { - sh 'python3.8 Zadanie_06_training.py ${BATCH_SIZE} ${EPOCHS}' + sh 'python3.8 Zadanie_06_and_07_training.py ${BATCH_SIZE} ${EPOCHS}' } } } diff --git a/MLproject b/MLproject new file mode 100644 index 0000000..e788445 --- /dev/null +++ b/MLproject @@ -0,0 +1,11 @@ +name: 434788-mlflow + +docker_env: + image: snowycocoon/ium_434788:4 + +entry_points: + main: + parameters: + batch_size: {type: int, default: 16} + epochs: {type: int, default: 15} + command: "python3 Zadanie_08_MLflow.py {batch_size} {epochs}" \ No newline at end of file diff --git a/Zadanie_05_Docker.py b/Zadanie_05_ML.py similarity index 100% rename from Zadanie_05_Docker.py rename to Zadanie_05_ML.py diff --git a/Zadanie_06_training.py b/Zadanie_06_and_07_training.py similarity index 100% rename from Zadanie_06_training.py rename to Zadanie_06_and_07_training.py diff --git a/Zadanie_08_MLflow.py b/Zadanie_08_MLflow.py new file mode 100644 index 0000000..ef5fd7b --- /dev/null +++ b/Zadanie_08_MLflow.py @@ -0,0 +1,73 @@ +from tensorflow.keras.models import Sequential, load_model +from tensorflow.keras.layers import Dense +from sklearn.metrics import accuracy_score, classification_report +import pandas as pd +from sklearn.model_selection import train_test_split +import numpy as np +import sys +from sklearn.preprocessing import StandardScaler, LabelEncoder +from tensorflow.keras.optimizers import Adam +from sacred.observers import FileStorageObserver, MongoObserver +from sacred import Experiment +from datetime import datetime +import os +import sys +import mlflow + +with mlflow.start_run(): + + batch_param = int(sys.argv[1]) if len(sys.argv) > 1 else 16 + epoch_param = int(sys.argv[2]) if len(sys.argv) > 2 else 5 + + mlflow.log_param("batch_size: ", batch_param) + mlflow.log_param("epochs: ", epoch_param) + + wine=pd.read_csv('train.csv') + + y = wine['quality'] + x = wine.drop('quality', axis=1) + + citricacid = x['fixed acidity'] * x['citric acid'] + citric_acidity = pd.DataFrame(citricacid, columns=['citric_accidity']) + + density_acidity = x['fixed acidity'] * x['density'] + density_acidity = pd.DataFrame(density_acidity, columns=['density_acidity']) + + + x = wine.join(citric_acidity).join(density_acidity) + + bins = (2, 5, 8) + gnames = ['bad', 'nice'] + y = pd.cut(y, bins = bins, labels = gnames) + + enc = LabelEncoder() + yenc = enc.fit_transform(y) + + scale = StandardScaler() + scaled_x = scale.fit_transform(x) + + NeuralModel = Sequential([ + Dense(128, activation='relu', input_shape=(14,)), + Dense(32, activation='relu'), + Dense(64, activation='relu'), + Dense(64, activation='relu'), + Dense(64, activation='relu'), + Dense(1, activation='sigmoid') + ]) + + rms = Adam(lr=0.0003) + + NeuralModel.compile(optimizer=rms, loss='binary_crossentropy', metrics=['accuracy']) + + NeuralModel.fit(scaled_x, yenc, batch_size= batch_param, epochs = epoch_param) #verbose = 1 + + NeuralModel.save('wine_model.h5') + + #TO TYLKO NA POTRZEBY ZADANIA + y_pred = NeuralModel.predict(scaled_x) + + y_pred = np.around(y_pred, decimals=0) + + results = accuracy_score(yenc,y_pred) + + mlflow.log_metric("Accuracy: ", results) \ No newline at end of file