From f7c9671206ac530820a83efbbea1d5933ccdfa63 Mon Sep 17 00:00:00 2001
From: Wirusik <wirus006@gmail.com>
Date: Sun, 8 May 2022 17:12:22 +0200
Subject: [PATCH] only dockerfile true

---
 Dockerfile           | 20 ++++++++++----------
 Jenkinsfile          | 35 +++++++++++++++++++++--------------
 Jenkinsfile-training | 38 +++++++++++++++++++++-----------------
 init.py              | 26 ++++++--------------------
 neutral_network.py   |  4 ++--
 5 files changed, 60 insertions(+), 63 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b7c6426..54624d5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,6 +2,7 @@
 FROM ubuntu:latest
 
 # Install required dependencies
+RUN export PATH="$PATH:/root/.local/bin"
 RUN apt update
 RUN apt-get update
 RUN apt install -y figlet
@@ -13,19 +14,18 @@ RUN pip3 install pandas
 RUN pip3 install pillow --global-option="build_ext" --global-option="--disable-zlib" --global-option="--disable-jpeg"
 RUN pip3 install scikit-learn
 RUN pip3 install matplotlib
+RUN pip3 install torchvision
+
+# Args
+ARG KAGGLE_USERNAME
+ARG KAGGLE_KEY
+ENV IS_DOCKER=True
 
 # Create app directory in image
 WORKDIR /app
 
+# Copy everything from jenkins to /app
 COPY . .
-ARG KAGGLE_USERNAME
-ARG KAGGLE_KEY
 
-# Download kaggle dataset
-RUN kaggle datasets download -d hakeem/atp-and-wta-tennis-data
-RUN unzip -o atp-and-wta-tennis-data.zip
-
-# Script executed after docker run
-RUN python3 ./init.py
-RUN chmod a+rwx -R *
-RUN ls -la
\ No newline at end of file
+# Create kaggle catalog for authenticate
+RUN mkdir /.kaggle/ && chmod o+w /.kaggle
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 8689984..83b362b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,4 +1,10 @@
-pipeline {    
+pipeline {   
+    agent {
+		dockerfile {
+			additionalBuildArgs '-t ium'
+			args '-e KAGGLE_USERNAME=${params.KAGGLE_USERNAME} -e KAGGLE_KEY=${params.KAGGLE_KEY}'
+		}
+	} 
     parameters {
         string (
             defaultValue: 'wirus006',
@@ -12,22 +18,23 @@ pipeline {
             name: 'KAGGLE_KEY'
         )
     }
-    agent {
-        dockerfile {
-            additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s444498-create-dataset"
-        }
+    options {
+        copyArtifactPermission('s444498-training');
     }
     stages {
-        stage('Archive dataset') {
+        stage('Init datasets') {
             steps {
-                withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
-                "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-                    sh 'echo hello world | figlet'
-                    sh 'chmod a+rwx -R *'
-                    sh 'pwd && ls'
-                    sh 'ls /app/data/'
-                    archiveArtifacts artifacts: '/app/data/*', onlyIfSuccessful: true
-                }
+                sh 'python3 init.py'
+            }
+        }
+        stage('Archive datasets') {
+            steps {
+                archiveArtifacts artifacts: 'atp_test.csv, atp_train.csv', onlyIfSuccessful: true
+            }
+        }
+        stage('Run training job') {
+            steps {
+                build job: "s444498-training/master"
             }
         }
     }
diff --git a/Jenkinsfile-training b/Jenkinsfile-training
index 0fd7f96..f8cf5dc 100644
--- a/Jenkinsfile-training
+++ b/Jenkinsfile-training
@@ -1,4 +1,8 @@
 pipeline {
+    agent {
+        dockerfile true
+    }
+
     parameters {
         string(
             defaultValue: '64',
@@ -12,47 +16,47 @@ pipeline {
             name: 'EPOCHS',
             trim: true
         )
-        gitParameter branchFilter: 'origin/(.*)', defaultValue: 'main', name: 'BRANCH', type: 'PT_BRANCH'
         buildSelector(
 			defaultSelector: lastSuccessful(),
 			description: 'Which build to use for copying artifacts',
 			name: 'BUILD_SELECTOR'
 		)
     }
-
-    agent {
-        docker {
-            image 's444498-create-dataset'
-        }
-    }
     
     stages {
+        stage('Copy artifacts') {
+            steps {
+                copyArtifacts fingerprintArtifacts: true, projectName: 's444498-create-dataset', selector: buildParameter('BUILD_SELECTOR')
+            }
+        }
         stage('Train model') {
             steps {
-                sh "python neutral_network.py -e ${params.EPOCHS} -b ${params.BATCHSIZE}"
+                sh "chmod u+x ./neutral_network.py"
+                sh "python3 neutral_network.py -e ${params.EPOCHS} -b ${params.BATCHSIZE}"
+            }
+        }
+        stage('Archive model') {
+            steps {
+                archiveArtifacts artifacts: "model.zip", onlyIfSuccessful: true
             }
         }
     }
 
-    environment {
-        NOTIFICATION_ADDRESS = 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
-    }
-
     post {
         success {
-            emailext body: 'SUCCESS', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
+            emailext body: "SUCCESS", subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
         }
 
         failure {
-            emailext body: 'FAILURE', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
+            emailext body: "FAILURE", subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
         }
 
         unstable {
-            emailext body: 'UNSTABLE', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
+            emailext body: 'UNSTABLE', subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
         }
 
         changed {
-            emailext body: 'CHANGED', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
-        }	
+            emailext body: 'CHANGED', subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
+        }		
     }
 }
\ No newline at end of file
diff --git a/init.py b/init.py
index 3f1fe4b..3743179 100644
--- a/init.py
+++ b/init.py
@@ -7,41 +7,36 @@ import matplotlib
 from pathlib import Path
 
 # Inicjalizacja danych
-
+file_exists = exists('./df_atp.csv')
+if not file_exists:
+    subprocess.run(["kaggle", "datasets", "download", "-d", "hakeem/atp-and-wta-tennis-data"])
+    subprocess.run(["unzip", "-o", "atp-and-wta-tennis-data.zip"])
 atp_data = pd.read_csv('df_atp.csv')
 print(atp_data)
 
 # Średnia ilość gemów w pierwszym secie zwycięzców meczu
-
 print(atp_data[["Winner", "W1"]].mean())
 
 # Minimalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
-
 print(atp_data[["Winner", "W1"]].min())
 
 # Maksymalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
-
 print(atp_data[["Winner", "W1"]].max())
 
 # Odchylenie standardowe wygranych gemów w pierwszym secie osób wygrywających mecz
-
 print(atp_data[["Winner", "W1"]].std())
 
 # Mediana wygranych gemów w pierwszym secie osób wygrywających mecz
-
 print(atp_data[["Winner", "W1"]].median())
 
 # Zmiana nazwy nienazwanej kolumny
-
 atp_data.rename(columns={'Unnamed: 0':'ID'}, inplace=True)
 
 # Jak często kto był zwycięzcą
-
 print(atp_data.groupby("Winner")["ID"].nunique())
 
 # Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
 # 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
-
 atp_data.loc[atp_data["Round"] == 'The Final', "Round"] = -1
 atp_data.loc[atp_data["Round"] == 'Semifinals', "Round"] = -2
 atp_data.loc[atp_data["Round"] == 'Quarterfinals', "Round"] = -3
@@ -53,28 +48,19 @@ atp_data.loc[atp_data["Round"] == '4th Round', "Round"] = 4
 print(atp_data["Round"])
 
 # Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
-
 atp_data.loc[atp_data["Date"] == '########', "Date"] = ''
 print(atp_data["Date"])
 
 # Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
-
 atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
 atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
 
 # Wielkość zbioru i podzbiorów
-
 print("\nElements of total set: " + str(len(atp_data)))
 print("\nElements of test set: " + str(len(atp_test)))
 print("\nElements of dev set: " + str(len(atp_dev)))
 print("\nElements of train set: " + str(len(atp_train)))
 
 # Stworzenie plików z danymi trenującymi i testowymi
-
-filepath1 = Path('data/atp_test.csv')  
-filepath2 = Path('data/atp_train.csv')  
-filepath1.parent.mkdir(parents=True, exist_ok=True)  
-filepath2.parent.mkdir(parents=True, exist_ok=True)  
-
-atp_test.to_csv(filepath1)  
-atp_train.to_csv(filepath2)
\ No newline at end of file
+atp_test.to_csv('atp_test.csv', encoding="utf-8", index=False)  
+atp_train.to_csv('atp_train.csv', encoding="utf-8", index=False)
\ No newline at end of file
diff --git a/neutral_network.py b/neutral_network.py
index 65961d3..1b269bb 100644
--- a/neutral_network.py
+++ b/neutral_network.py
@@ -87,8 +87,8 @@ print(f"Using {device} device")
 args = setup_args()
 batch_size = args.batchSize
 
-plant_test = AtpDataset('data/atp_test.csv')
-plant_train = AtpDataset('data/atp_train.csv')
+plant_test = AtpDataset('atp_test.csv')
+plant_train = AtpDataset('atp_train.csv')
 
 train_dataloader = DataLoader(plant_train, batch_size=batch_size)
 test_dataloader = DataLoader(plant_test, batch_size=batch_size)