wip

2022-05-03 15:30:24 +02:00 · 2022-05-03 15:30:24 +02:00 · 954bb281a1
commit 954bb281a1
parent bbfd0dfe1d
3 changed files with 20 additions and 23 deletions
--- a/6
+++ b/6
@ -12,4 +12,10 @@ RUN apt-get install zip unzip --yes
 WORKDIR /app

 COPY ./deepl.py .
+
+COPY ./stare_zadania/process_data.sh .
+COPY ./stare_zadania/download_data_and_process.py .
+COPY ./stats.py .
+COPY ./stare_zadania/real-or-fake-fake-jobposting-prediction.zip .
+
 CMD python3 deepl.py
--- a/26
+++ b/26
@ -6,20 +6,9 @@ pipeline {
 	}
    parameters {
        string (
-            defaultValue: 'mikolajpokrywka',
-            description: 'Kaggle username',
-            name: 'KAGGLE_USERNAME',
-            trim: false
-        )
-        password(
-            defaultValue: '',
-            description: 'Kaggle token',
-            name: 'KAGGLE_KEY'
-        )
-        string (
-            defaultValue: '17000',
-            description: 'cut data',
-            name: 'CUTOFF',
+            defaultValue: '10',
+            description: 'Epochs number',
+            name: 'EPOCH',
            trim: false
        )
    }
@ -31,11 +20,10 @@ pipeline {
        }
        stage('bash script') {
            steps {
-                withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
-                         "KAGGLE_KEY=${params.KAGGLE_KEY}",
-                         "CUTOFF=${params.CUTOFF}"]) {
-                            sh 'python3 ./download_data_and_process.py'
-                            archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
+                withEnv(["EPOCH=${params.CUTOFF}"]) {
+                            copyArtifacts filter: '*', projectName: 's444463-create-dataset'
+                            sh 'python3 ./deepl.py $EPOCHS'
+                            archiveArtifacts artifacts: "model"
                }
            }
        }
--- a/deepl.py
+++ b/deepl.py
@ -9,6 +9,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from torch import nn
 from torch import optim
 import matplotlib.pyplot as plt
+import sys
+


 def convert_text_to_model_form(text):
@ -18,9 +20,11 @@ def convert_text_to_model_form(text):


 if __name__ == "__main__":
-    kaggle.api.authenticate()
-    kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
-                                      unzip=True)
+    epochs = int(sys.argv[1])
+
+    # kaggle.api.authenticate()
+    # kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
+    #                                   unzip=True)

    data = pd.read_csv('fake_job_postings.csv', engine='python')
    # data = data.replace(np.nan, '', regex=True)
@ -79,7 +83,6 @@ if __name__ == "__main__":
    test_losses = []
    test_accuracies = []

-    epochs = 50
    for e in range(epochs):
        optimizer.zero_grad()