update

2023-04-19 18:47:42 +02:00 · 2023-04-19 18:47:42 +02:00 · e3e995ea3f
commit e3e995ea3f
parent 9857b4339b
5 changed files with 84 additions and 3 deletions
--- a/5
+++ b/5
@ -0,0 +1,5 @@
+FROM python:latest
+
+RUN apt-get update && apt-get install -y
+
+RUN pip install pandas
--- a/4
+++ b/4
@ -1,7 +1,6 @@
 pipeline {
   agent  any

-   //Definijuemy parametry, które będzie można podać podczas wywoływania zadania
         parameters{
             string(
                 defaultValue: '500',
@ -13,7 +12,6 @@ pipeline {
    stages {
      stage('clear_all') {
         steps {
-            //Wypisz wartość parametru w konsoli (To nie jest polecenie bash, tylko groovy!)
            sh 'rm -rf *'
         }
      }
@ -21,7 +19,6 @@ pipeline {
      stage('Build') {
         steps {
            sh 'git clone https://git.wmi.amu.edu.pl/s444439/ium_z444439'
-         // Run the maven build
            sh 'curl -O https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
            sh 'sed -i "1i\\age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income" adult.data'
            sh 'mv adult.data adult.csv'
@ -41,6 +38,7 @@ agent {
      steps {
               sh 'ls -a'
               sh 'python ./ium_z444439/create-dataset.py'
+               echo 'process finish'
               archiveArtifacts 'X_test.csv'
               archiveArtifacts 'X_dev.csv'
               archiveArtifacts 'X_train.csv'
--- a/50
+++ b/50
@ -0,0 +1,50 @@
+pipeline {
+   agent any
+         parameters{
+            choice(
+               choices: ['lastSuccessful()', 'lastCompleted()', 'latestSavedBuild()'], 
+               description: 'Which build to use for copying artifacts',
+               name: 'BUILD_SELECTOR'
+            )}
+    stages {
+      stage('clear_all') {
+         steps {
+            sh 'rm -rf ium_z444439'
+         }
+      }
+      stage('checkout') {
+         steps {
+            sh 'git clone https://git.wmi.amu.edu.pl/s444439/ium_z444439'
+         }
+      }
+      stage('copy_artifacts') {
+         steps {
+            copyArtifacts filter: 'X_test.csv,X_dev.csv,X_train.csv', fingerprintArtifacts: true, projectName: 'z-s444439-create-dataset', selector: workspace()
+         }
+      }
+      stage('Docker') {
+         agent { 
+            dockerfile {
+               filename 'Dockerfile_sec'
+               dir      'ium_z444439'
+               reuseNode true
+            }
+         }
+         steps {
+               sh 'ls -a'
+               sh 'python ./ium_z444439/stats.py'
+               echo 'process finish'
+               archiveArtifacts 'X_test.csv'
+               archiveArtifacts 'X_dev.csv'
+               archiveArtifacts 'X_train.csv'
+               echo 'finish'
+         }
+      }
+      stage('Goodbye!') {
+         steps {
+            archiveArtifacts 'dataset.csv'
+            
+         }
+      }
+   }
+}
--- a/create-dataset.py
+++ b/create-dataset.py
@ -0,0 +1,18 @@
+import pandas
+import os
+from sklearn.model_selection import train_test_split
+
+CUTOFF = int(os.environ['CUTOFF'])
+adults = pandas.read_csv('./ium_z444439/adult.csv', engine='python', encoding='ISO-8859-1', sep=',')
+adults = adults.dropna()
+
+adults = adults.sample(CUTOFF)
+
+X, Y = adults, adults
+
+X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=1)
+X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.3, random_state=1)
+
+X_train.to_csv('X_train.csv', index=False)
+X_dev.to_csv('X_dev.csv', index=False)
+X_test.to_csv('X_test.csv', index=False)
--- a/stats.py
+++ b/stats.py
@ -0,0 +1,10 @@
+import pandas
+
+X_dev = pandas.read_csv('X_dev.csv', engine='python', encoding='ISO-8859-1', sep=',')
+X_train = pandas.read_csv('X_train.csv', engine='python', encoding='ISO-8859-1', sep=',')
+
+X_test = pandas.read_csv('X_test.csv', engine='python', encoding='ISO-8859-1', sep=',')
+
+X_dev.describe(include='all').to_csv('X_dev_stats.csv', index=True)
+X_train.describe(include='all').to_csv('X_train_stats.csv', index=True)
+X_test.describe(include='all').to_csv('X_test_stats.csv', index=True)