diff --git a/.gitignore b/.gitignore index 9c9209f..975351a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ -df_atp.csv -df_wta.csv -atp-and-wta-tennis-data.zip -data -model.zip -secret.txt +*.csv +*.zip +*.png +*.txt +__pycache__ diff --git a/Dockerfile b/Dockerfile index 54624d5..c2f5a17 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,10 @@ RUN apt install -y figlet RUN export PATH=”$PATH:/usr/local/bin/python” RUN apt install python3-pip -y RUN apt install unzip -y +RUN pip3 install numpy RUN pip3 install kaggle RUN pip3 install pandas -RUN pip3 install pillow --global-option="build_ext" --global-option="--disable-zlib" --global-option="--disable-jpeg" +RUN pip3 install pillow RUN pip3 install scikit-learn RUN pip3 install matplotlib RUN pip3 install torchvision diff --git a/Jenkinsfile b/Jenkinsfile index 83b362b..528e3aa 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,7 +1,7 @@ pipeline { agent { dockerfile { - additionalBuildArgs '-t ium' + additionalBuildArgs '-t s444498-init-datasets' args '-e KAGGLE_USERNAME=${params.KAGGLE_USERNAME} -e KAGGLE_KEY=${params.KAGGLE_KEY}' } } @@ -29,7 +29,7 @@ pipeline { } stage('Archive datasets') { steps { - archiveArtifacts artifacts: 'atp_test.csv, atp_train.csv', onlyIfSuccessful: true + archiveArtifacts artifacts: 'atp_test.csv, atp_dev.csv, atp_train.csv', onlyIfSuccessful: true } } stage('Run training job') { diff --git a/Jenkinsfile-evaluation b/Jenkinsfile-evaluation new file mode 100644 index 0000000..d2c7728 --- /dev/null +++ b/Jenkinsfile-evaluation @@ -0,0 +1,49 @@ +pipeline { + agent { + dockerfile true + } + + parameters { + gitParameter branchFilter: 'origin/(.*)', defaultValue: 'master', name: 'BRANCH', type: 'PT_BRANCH' + buildSelector( + defaultSelector: lastSuccessful(), + description: 'Which build to use for copying artifacts', + name: 'BUILD_SELECTOR' + ) + } + + stages { + stage('Copy artifacts') { + steps { + copyArtifacts fingerprintArtifacts: true, projectName: 's444498-create-dataset', selector: buildParameter('BUILD_SELECTOR') + copyArtifacts fingerprintArtifacts: true, projectName: 's444498-training/${BRANCH}', selector: buildParameter('BUILD_SELECTOR') + copyArtifacts filter: "eval_result.txt", projectName: 's444498-evaluation/${BRANCH}', optional: true + } + } + stage("Evaluation") { + steps { + sh "chmod u+x ./evaluation.py" + sh "python3 ./evaluation.py" + archiveArtifacts artifacts: "eval_result.txt", onlyIfSuccessful: true + } + } + } + + post { + success { + emailext body: "SUCCESS", subject: "s444498-evaluation", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms" + } + + failure { + emailext body: "FAILURE", subject: "s444498-evaluation", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms" + } + + unstable { + emailext body: 'UNSTABLE', subject: "s444498-evaluation", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms" + } + + changed { + emailext body: 'CHANGED', subject: "s444498-evaluation", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms" + } + } +} \ No newline at end of file diff --git a/Jenkinsfile-training b/Jenkinsfile-training index f8cf5dc..6de7b0a 100644 --- a/Jenkinsfile-training +++ b/Jenkinsfile-training @@ -45,6 +45,7 @@ pipeline { post { success { emailext body: "SUCCESS", subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms" + build job: "s444498-evaluation/master" } failure { diff --git a/evaluation.py b/evaluation.py new file mode 100644 index 0000000..7f24cb3 --- /dev/null +++ b/evaluation.py @@ -0,0 +1,36 @@ +import matplotlib.pyplot as plt +import torch +from torch.utils.data import DataLoader +from neutral_network import MLP, AtpDataset, test + +def load_model(): + model = MLP() + model.load_state_dict(torch.load('./model.zip')) + return model + +def load_dev_dataset(batch_size=64): + atp_dev = AtpDataset('atp_dev.csv') + return DataLoader(atp_dev, batch_size=batch_size) + +def make_plot(values): + build_nums = list(range(1, len(values) + 1)) + plt.xlabel('Build number') + plt.ylabel('MSE Loss') + plt.plot(build_nums, values, label='Model MSE Loss over builds') + plt.legend() + plt.savefig('plot.png') + +model = load_model() +dataloader = load_dev_dataset() + +loss_fn = torch.nn.MSELoss() + +loss = test(dataloader, model, loss_fn) +with open('eval_result.txt', 'a+') as f: + f.write(f'{str(loss)}\n') +with open('eval_result.txt', 'r') as f: + values = [float(line) for line in f.readlines() if line] + make_plot(values) + + + diff --git a/init.py b/init.py index 3743179..0537a46 100644 --- a/init.py +++ b/init.py @@ -62,5 +62,6 @@ print("\nElements of dev set: " + str(len(atp_dev))) print("\nElements of train set: " + str(len(atp_train))) # Stworzenie plików z danymi trenującymi i testowymi -atp_test.to_csv('atp_test.csv', encoding="utf-8", index=False) +atp_test.to_csv('atp_test.csv', encoding="utf-8", index=False) +atp_dev.to_csv('atp_dev.csv', encoding="utf-8", index=False) atp_train.to_csv('atp_train.csv', encoding="utf-8", index=False) \ No newline at end of file