From c891fcf53fde3ce5c929e11f9eebad6317a3d8e6 Mon Sep 17 00:00:00 2001 From: s444439 Date: Wed, 19 Apr 2023 19:46:29 +0200 Subject: [PATCH] update --- Jenkinsfile | 29 ++++++++++++++--------------- Jenkinsfile_stats | 32 +++++++++++++++----------------- create-dataset.py | 15 ++++++++------- stats.py | 12 ++++++------ 4 files changed, 43 insertions(+), 45 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0fb390e..07cf64d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,5 @@ pipeline { agent any - parameters{ string( defaultValue: '500', @@ -9,8 +8,8 @@ pipeline { trim: false ) } - stages { - stage('clear_all') { + stages { + stage('clear') { steps { sh 'rm -rf *' } @@ -28,21 +27,21 @@ pipeline { } } stage('Docker') { -agent { - dockerfile { - filename 'Dockerfile' - dir 'ium_z444439' - reuseNode true - } -} - steps { + agent { + dockerfile { + filename 'Dockerfile' + dir 'ium_z444439' + reuseNode true + } + } + steps { sh 'ls -a' sh 'python ./ium_z444439/create-dataset.py' echo 'process finish' - archiveArtifacts 'X_test.csv' - archiveArtifacts 'X_dev.csv' - archiveArtifacts 'X_train.csv' - } + archiveArtifacts 'adult_test.csv' + archiveArtifacts 'adult_dev.csv' + archiveArtifacts 'adult_train.csv' + } } } } \ No newline at end of file diff --git a/Jenkinsfile_stats b/Jenkinsfile_stats index 3131774..5835279 100644 --- a/Jenkinsfile_stats +++ b/Jenkinsfile_stats @@ -1,15 +1,15 @@ pipeline { - agent any - parameters{ - choice( - choices: ['lastSuccessful()', 'lastCompleted()', 'latestSavedBuild()'], - description: 'Which build to use for copying artifacts', - name: 'BUILD_SELECTOR' - )} + agent any + parameters{ + choice( + choices: ['lastSuccessful()', 'lastCompleted()', 'latestSavedBuild()'], + description: 'Which build to use for copying artifacts', + name: 'BUILD_SELECTOR' + )} stages { - stage('clear_all') { + stage('clear') { steps { - sh 'rm -rf ium_z444439' + sh 'rm -rf *' } } stage('checkout') { @@ -19,13 +19,13 @@ pipeline { } stage('copy_artifacts') { steps { - copyArtifacts filter: 'X_test.csv,X_dev.csv,X_train.csv', fingerprintArtifacts: true, projectName: 'z-s444439-create-dataset', selector: workspace() + copyArtifacts filter: 'adult_test.csv,adult_dev.csv,adult_train.csv', fingerprintArtifacts: true, projectName: 'z-s444439-create-dataset', selector: workspace() } } stage('Docker') { agent { dockerfile { - filename 'Dockerfile_sec' + filename 'Dockerfile' dir 'ium_z444439' reuseNode true } @@ -34,16 +34,14 @@ pipeline { sh 'ls -a' sh 'python ./ium_z444439/stats.py' echo 'process finish' - archiveArtifacts 'X_test.csv' - archiveArtifacts 'X_dev.csv' - archiveArtifacts 'X_train.csv' - echo 'finish' + archiveArtifacts 'adult_test_stats.csv' + archiveArtifacts 'adult_dev_stats.csv' + archiveArtifacts 'adult_train_stats.csv' } } stage('Goodbye!') { steps { - archiveArtifacts 'dataset.csv' - + sh 'rm -rf *' } } } diff --git a/create-dataset.py b/create-dataset.py index d477753..c761073 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -8,11 +8,12 @@ adults = adults.dropna() adults = adults.sample(CUTOFF) -X, Y = adults, adults +adult_X, adult_Y = adults, adults +adult_X_train, adult_X_temp, adult_Y_train, adult_Y_temp = train_test_split(adult_X, adult_Y, test_size=0.3, + random_state=1) +adult_X_dev, adult_X_test, adult_Y_dev, adult_Y_test = train_test_split(adult_X_temp, adult_Y_temp, test_size=0.3, + random_state=1) -X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=1) -X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.3, random_state=1) - -X_train.to_csv('X_train.csv', index=False) -X_dev.to_csv('X_dev.csv', index=False) -X_test.to_csv('X_test.csv', index=False) +adult_X_train.to_csv('adult_train.csv', index=False) +adult_X_dev.to_csv('adult_dev.csv', index=False) +adult_X_test.to_csv('adult_test.csv', index=False) diff --git a/stats.py b/stats.py index ec3f479..ebaf5c6 100644 --- a/stats.py +++ b/stats.py @@ -1,10 +1,10 @@ import pandas -X_dev = pandas.read_csv('X_dev.csv', engine='python', encoding='ISO-8859-1', sep=',') -X_train = pandas.read_csv('X_train.csv', engine='python', encoding='ISO-8859-1', sep=',') +adult_dev = pandas.read_csv('adult_dev.csv', engine='python', encoding='ISO-8859-1', sep=',') +adult_train = pandas.read_csv('adult_train.csv', engine='python', encoding='ISO-8859-1', sep=',') -X_test = pandas.read_csv('X_test.csv', engine='python', encoding='ISO-8859-1', sep=',') +adult_test = pandas.read_csv('adult_test.csv', engine='python', encoding='ISO-8859-1', sep=',') -X_dev.describe(include='all').to_csv('X_dev_stats.csv', index=True) -X_train.describe(include='all').to_csv('X_train_stats.csv', index=True) -X_test.describe(include='all').to_csv('X_test_stats.csv', index=True) +adult_dev.describe(include='all').to_csv('adult_dev_stats.csv', index=True) +adult_train.describe(include='all').to_csv('adult_train_stats.csv', index=True) +adult_test.describe(include='all').to_csv('adult_test_stats.csv', index=True)