diff --git a/Jenkinsfile_create_dataset b/Jenkinsfile_create_dataset index 8b93923..6994eae 100644 --- a/Jenkinsfile_create_dataset +++ b/Jenkinsfile_create_dataset @@ -1,10 +1,8 @@ pipeline { agent any - - //Definijuemy parametry, które będzie można podać podczas wywoływania zadania parameters{ string( - defaultValue: 'mikaleta', + defaultValue: 'kalkam', description: 'Kaggle username', name: 'KAGGLE_USERNAME', trim: false @@ -22,7 +20,7 @@ pipeline { ) } stages { - stage('clear_before') { + stage('Clear directory before executing') { steps { sh 'rm -rf *' } @@ -34,7 +32,7 @@ pipeline { } } - stage('Build') { + stage('Download dataset') { steps { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { diff --git a/Jenkinsfile_dataset_stats b/Jenkinsfile_dataset_stats new file mode 100644 index 0000000..3238b34 --- /dev/null +++ b/Jenkinsfile_dataset_stats @@ -0,0 +1,46 @@ +pipeline { + agent any + parameters{ + choice( + choices: ['lastSuccessful()', 'lastCompleted()', 'latestSavedBuild()'], + description: 'Which build to use for copying artifacts', + name: 'BUILD_SELECTOR' + )} + stages { + stage('Clear directory') { + steps { + sh 'rm -rf *' + } + } + stage('Clone project from repo') { + steps { + sh 'git clone https://git.wmi.amu.edu.pl/s486867/ium_z486867' + } + } + stage('Copy artifacts from previous build') { + steps { + copyArtifacts filter: 'X_test.csv,X_dev.csv,X_train.csv', fingerprintArtifacts: true, projectName: 'z-s486867-create-dataset', selector: workspace() + } + } + stage('Docker') { + agent { + dockerfile { + filename 'Dockerfile' + dir 'ium_z486867' + reuseNode true + } + } + steps { + sh 'python ./ium_z486867/dataset-stats.py' + archiveArtifacts 'X_test.csv' + archiveArtifacts 'X_dev.csv' + archiveArtifacts 'X_train.csv' + } + } + stage('Clear directory') { + steps { + sh 'rm -rf *' + } + } + } +} \ No newline at end of file diff --git a/dataset-stats.py b/dataset-stats.py new file mode 100644 index 0000000..beba31f --- /dev/null +++ b/dataset-stats.py @@ -0,0 +1,18 @@ +import pandas + +X_train = pandas.read_csv('X_train.csv', + engine='python', + encoding='ISO-8859-1', + sep=',') +X_dev = pandas.read_csv('X_dev.csv', + engine='python', + encoding='ISO-8859-1', + sep=',') +X_test = pandas.read_csv('X_test.csv', + engine='python', + encoding='ISO-8859-1', + sep=',') + +X_train.describe(include='all').to_csv('X_train_stats.csv', index=True) +X_dev.describe(include='all').to_csv('X_dev_stats.csv', index=True) +X_test.describe(include='all').to_csv('X_test_stats.csv', index=True) \ No newline at end of file