diff --git a/Dockerfile b/Dockerfile index 46cd70a..36b2be5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM ubuntu RUN apt-get update && apt-get install -y python3 python3-pip unzip -RUN python3 -m pip install pandas numpy tensorflow imbalanced-learn sklearn sacred pymongo +RUN python3 -m pip install pandas numpy tensorflow imbalanced-learn sklearn sacred pymongo mlflow RUN apt-get install -y git COPY train.py /app/train.py @@ -12,6 +12,6 @@ COPY data.csv /app/data.csv WORKDIR /app RUN export SACRED_IGNORE_GIT=TRUE -RUN python3 train.py +RUN mlflow run . -P epochs=10 -CMD ["python3", "predictions.py"] +CMD ["python3", "predictions.py"] \ No newline at end of file diff --git a/JenkinsfileDL b/JenkinsfileDL index 3cedb39..1733eab 100644 --- a/JenkinsfileDL +++ b/JenkinsfileDL @@ -14,7 +14,7 @@ pipeline { stages { stage('Preparation') { steps { - sh 'pip install pandas tensorflow scikit-learn imbalanced-learn sacred pymongo' + sh 'pip install pandas tensorflow scikit-learn imbalanced-learn sacred pymongo mlflow' } } stage('Pobierz dane') { @@ -27,13 +27,17 @@ pipeline { stage('Trenuj model') { steps { script { - sh "python3 train.py" //--epochs $EPOCHS + sh 'mlflow run . -P epochs=$EPOCHS' } } } stage('Zarchiwizuj model') { steps { - archiveArtifacts artifacts: 'model.h5', fingerprint: true + sh ''' + mkdir -p model + cp -r mlruns/* model/ + ''' + archiveArtifacts artifacts: 'model/**', fingerprint: true } } } diff --git a/train.py b/train.py index 72ff481..3447d1b 100644 --- a/train.py +++ b/train.py @@ -1,16 +1,17 @@ from sacred import Experiment from sacred.observers import MongoObserver, FileStorageObserver import os +import mlflow +import mlflow.keras os.environ["SACRED_NO_GIT"] = "1" ex = Experiment('s487187-training', interactive=True, save_git_info=False) ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) - @ex.config def my_config(): - data_file = 'data.csv' + data_file = 'data.csv' model_file = 'model.h5' epochs = 10 batch_size = 32 @@ -25,45 +26,45 @@ def train_model(data_file, model_file, epochs, batch_size, test_size, random_sta import tensorflow as tf from imblearn.over_sampling import SMOTE - smote = SMOTE(random_state=random_state) - data = pd.read_csv(data_file, sep=';') + with mlflow.start_run(): - print('Total rows:', len(data)) - print('Rows with medal:', len(data.dropna(subset=['Medal']))) + smote = SMOTE(random_state=random_state) + data = pd.read_csv(data_file, sep=';') - data = pd.get_dummies(data, columns=['Sex', 'Medal']) - data = data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event']) + data = pd.get_dummies(data, columns=['Sex', 'Medal']) + data = data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event']) - scaler = MinMaxScaler() - data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns) + scaler = MinMaxScaler() + data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns) - X = data.filter(regex='Sex|Age') - y = data.filter(regex='Medal') - y = pd.get_dummies(y) + X = data.filter(regex='Sex|Age') + y = data.filter(regex='Medal') + y = pd.get_dummies(y) - X = X.fillna(0) - y = y.fillna(0) + X = X.fillna(0) + y = y.fillna(0) - y = y.values + y = y.values - X_resampled, y_resampled = smote.fit_resample(X, y) - X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=test_size, random_state=random_state) + X_resampled, y_resampled = smote.fit_resample(X, y) + X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=test_size, random_state=random_state) - model = tf.keras.models.Sequential() - model.add(tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu')) - model.add(tf.keras.layers.Dense(32, activation='relu')) - model.add(tf.keras.layers.Dense(y.shape[1], activation='softmax')) + model = tf.keras.models.Sequential() + model.add(tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu')) + model.add(tf.keras.layers.Dense(32, activation='relu')) + model.add(tf.keras.layers.Dense(y.shape[1], activation='softmax')) - model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) + model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) - model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size) - loss, accuracy = model.evaluate(X_test, y_test) - print('Test accuracy:', accuracy) - print('Test loss:', loss) + model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size) + loss, accuracy = model.evaluate(X_test, y_test) - model.save(model_file) + mlflow.log_metric("loss", loss) + mlflow.log_metric("accuracy", accuracy) - return accuracy + mlflow.keras.save_model(model, model_file) + + return accuracy @ex.main def run_experiment(): @@ -71,4 +72,4 @@ def run_experiment(): ex.log_scalar('accuracy', accuracy) ex.add_artifact('model.h5') -ex.run() \ No newline at end of file +ex.run()