Preprocesing python

2021-05-16 21:06:46 +02:00 · 2021-05-16 21:06:46 +02:00 · 19bf9c3fe0
commit 19bf9c3fe0
parent 6bba24a4b9
3 changed files with 42 additions and 6 deletions
--- a/2
+++ b/2
@ -15,7 +15,7 @@ RUN pip3 install sklearn
 WORKDIR /app
 COPY ./preparations.sh ./
-COPY ./preprocesing_python.py ./
+COPY ./preprocesing.py ./
 COPY ./training.py ./
 # CMD ./preparations.sh
--- a/10
+++ b/10
@ -21,18 +21,18 @@ pipeline {
                script {
                    def image = docker.build('dock')
                    image.inside{
-                        sh 'chmod +x preparations.sh'
+                        sh 'chmod +x preprocesing.py'
                        sh 'echo ${CUTOFF}'
-                        sh './preparations.sh ${CUTOFF}'
+                        sh 'python3 preprocessing.py ${CUTOFF}'
                    }
                }
            }
        }
        stage('archiveArtifacts'){
            steps{
-                archiveArtifacts 'data.dev'
+                archiveArtifacts 'test.csv'
-                archiveArtifacts 'data.train'
+                archiveArtifacts 'validate.csv'
-                archiveArtifacts 'data.test'
+                archiveArtifacts 'train.csv'
            }
        }
    }
--- a/preprocesing.py
+++ b/preprocesing.py
@ -0,0 +1,36 @@
 import sys
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 CUTOFF = int(sys.argv[1])
 sc = pd.read_csv('who_suicide_statistics.csv')
 age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2,
       "35-54 years": 3, "55-74 years": 4, "75+ years": 5}
 sex = {"male": 0, "female": 1}
 # Usunięcie niepełnych danych
 sc.dropna(inplace=True)
 # Kategoryzacja
 sc = pd.get_dummies(
    sc, columns=['age', 'sex', 'country'], prefix='', prefix_sep='')
 # CUTOFF
 sc = sc.head(CUTOFF)
 # podział na train validate i test
 train, validate, test = np.split(sc.sample(frac=1, random_state=42),
                                 [int(.6*len(sc)), int(.8*len(sc))])
 # zapis do plików
 train.to_csv('train.csv')
 validate.to_csv('validate.csv')
 test.to_csv('test.csv')
 print(train)
 print(validate)
 print(test)