diff --git a/Dockerfile b/Dockerfile index 2a763ee..c4176b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,8 +23,4 @@ RUN chmod +x /load_data.sh RUN /load_data.sh RUN chmod +x /grab_avocado.py -RUN python3 /grab_avocado.py - -# Run the model and train it -RUN chmod +x /model.py -RUN python3 /model.py \ No newline at end of file +RUN python3 /grab_avocado.py \ No newline at end of file diff --git a/jenkins/training.Jenkinsfile b/jenkins/training.Jenkinsfile new file mode 100644 index 0000000..f5586f7 --- /dev/null +++ b/jenkins/training.Jenkinsfile @@ -0,0 +1,61 @@ +pipeline { + agent { + dockerfile true + } + parameters { + string( + defaultValue: '5', + description: 'epochs number', + name: 'epochs' + ), + string { + defaultValue: '--save', + description: 'save model after training', + name: 'save_model' + } + } + stages { + stage('Checkout') { + steps { + checkout([$class: 'GitSCM', branches: [[name: '*/develop']], extensions: [], userRemoteConfigs: [ + [url: 'https://git.wmi.amu.edu.pl/s478841/ium_478841.git']]]) + } + } + stage('Copy Artifacts') { + steps { + copyArtifacts filter: '*.csv', fingerprintArtifacts: true, projectName: 's478841-create-dataset', selector: lastSuccessful() + } + } + stage('Model training') { + steps { + sh "chmod +x -R ${env.WORKSPACE}" + sh 'python model.py -e $epochs $save_model' + } + } + stage('Archive artifacts') { + steps { + + archiveArtifacts artifacts: '*data/predictions.csv', onlyIfSuccessful: true + archiveArtifacts artifacts: '*data/model_scripted*', onlyIfSuccessful: true + } + } + } + + post { + success { + emailtext body: 'SUCCESS', subject: "s478841-training", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms' + } + + failure { + emailtext body: 'FAILURE', subject: "s478841-training", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms' + } + + unstable { + emailtext body: 'UNSTABLE', subject: "s478841-training", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms' + } + + changed { + emailtext body: 'CHANGED', subject: "s478841-training", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms' + } + } +} diff --git a/scripts/model.py b/scripts/model.py index cd2bd46..4352f43 100644 --- a/scripts/model.py +++ b/scripts/model.py @@ -1,3 +1,5 @@ +import argparse + import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error @@ -111,6 +113,21 @@ def predict(row, model): if __name__ == '__main__': + # * Model parameters + parser = argparse.ArgumentParser(description="Script performing logistic regression model training", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "-e", "--epochs", default=100, help="Number of epochs the model will be trained for") + parser.add_argument("--save", action="store_true", + help="Save trained model to file 'trained_model.h5'") + + args = vars(parser.parse_args()) + + epochs = args['epochs'] + save_model = args['save'] + print( + f"Your model will be trained for {epochs} epochs. Trained model will {'not ' if save_model else ''}be saved.") + # * Paths to data avocado_train = './data/avocado.data.train' avocado_valid = './data/avocado.data.valid' @@ -135,7 +152,7 @@ if __name__ == '__main__': # * Train model print("Let's start the training, mate!") - train_model(train_dl, model) + train_model(train_dl, model, int(epochs)) # * Evaluate model mse = evaluate_model(validate_dl, model) @@ -144,5 +161,12 @@ if __name__ == '__main__': # * Prediction predictions = [(predict(row, model)[0], row[1].item()) for row in test_dl] preds_df = pd.DataFrame(predictions, columns=["Prediction", "Target"]) - print("\nNow predictions - hey ho, let's go!\n", preds_df.head()) + print("\nNow predictions - hey ho, let's go!\n", + preds_df.head(), "\n\n...let's save them\ndum...\ndum...\ndum dum dum...\n\tDUM\n") preds_df.to_csv("./data/predictions.csv", index=False) + + # * Save the trained model + if save_model: + print("Your model has been saved - have a nice day!") + scripted_model = torch.jit.script(model) + scripted_model.save('./data/model_scripted.pt')