From 954bb281a1a30340dec10d24bfcfe118f3da4a15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Pokrywka?= <mp7961@gmail.com>
Date: Tue, 3 May 2022 15:30:24 +0200
Subject: [PATCH] wip

---
 Dockerfile  |  6 ++++++
 Jenkinsfile | 26 +++++++-------------------
 deepl.py    | 11 +++++++----
 3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7f68e74..8af5af7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,4 +12,10 @@ RUN apt-get install zip unzip --yes
 WORKDIR /app
 
 COPY ./deepl.py .
+
+COPY ./stare_zadania/process_data.sh .
+COPY ./stare_zadania/download_data_and_process.py .
+COPY ./stats.py .
+COPY ./stare_zadania/real-or-fake-fake-jobposting-prediction.zip .
+
 CMD python3 deepl.py
diff --git a/Jenkinsfile b/Jenkinsfile
index 789ccc7..111f343 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -6,20 +6,9 @@ pipeline {
 	}
     parameters {
         string (
-            defaultValue: 'mikolajpokrywka',
-            description: 'Kaggle username',
-            name: 'KAGGLE_USERNAME',
-            trim: false
-        )
-        password(
-            defaultValue: '',
-            description: 'Kaggle token',
-            name: 'KAGGLE_KEY'
-        )
-        string (
-            defaultValue: '17000',
-            description: 'cut data',
-            name: 'CUTOFF',
+            defaultValue: '10',
+            description: 'Epochs number',
+            name: 'EPOCH',
             trim: false
         )
     }
@@ -31,11 +20,10 @@ pipeline {
         }
         stage('bash script') {
             steps {
-                withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
-                         "KAGGLE_KEY=${params.KAGGLE_KEY}",
-                         "CUTOFF=${params.CUTOFF}"]) {
-                            sh 'python3 ./download_data_and_process.py'
-                            archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
+                withEnv(["EPOCH=${params.CUTOFF}"]) {
+                            copyArtifacts filter: '*', projectName: 's444463-create-dataset'
+                            sh 'python3 ./deepl.py $EPOCHS'
+                            archiveArtifacts artifacts: "model"
                 }
             }
         }
diff --git a/deepl.py b/deepl.py
index b5d3fb2..c908806 100644
--- a/deepl.py
+++ b/deepl.py
@@ -9,6 +9,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from torch import nn
 from torch import optim
 import matplotlib.pyplot as plt
+import sys
+
 
 
 def convert_text_to_model_form(text):
@@ -18,9 +20,11 @@ def convert_text_to_model_form(text):
 
 
 if __name__ == "__main__":
-    kaggle.api.authenticate()
-    kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
-                                      unzip=True)
+    epochs = int(sys.argv[1])
+
+    # kaggle.api.authenticate()
+    # kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
+    #                                   unzip=True)
 
     data = pd.read_csv('fake_job_postings.csv', engine='python')
     # data = data.replace(np.nan, '', regex=True)
@@ -79,7 +83,6 @@ if __name__ == "__main__":
     test_losses = []
     test_accuracies = []
 
-    epochs = 50
     for e in range(epochs):
         optimizer.zero_grad()