Preprocesing python
All checks were successful
s434784-training/pipeline/head This commit looks good
All checks were successful
s434784-training/pipeline/head This commit looks good
This commit is contained in:
parent
6bba24a4b9
commit
19bf9c3fe0
@ -15,7 +15,7 @@ RUN pip3 install sklearn
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./preparations.sh ./
|
COPY ./preparations.sh ./
|
||||||
COPY ./preprocesing_python.py ./
|
COPY ./preprocesing.py ./
|
||||||
COPY ./training.py ./
|
COPY ./training.py ./
|
||||||
|
|
||||||
# CMD ./preparations.sh
|
# CMD ./preparations.sh
|
||||||
|
10
Jenkinsfile
vendored
10
Jenkinsfile
vendored
@ -21,18 +21,18 @@ pipeline {
|
|||||||
script {
|
script {
|
||||||
def image = docker.build('dock')
|
def image = docker.build('dock')
|
||||||
image.inside{
|
image.inside{
|
||||||
sh 'chmod +x preparations.sh'
|
sh 'chmod +x preprocesing.py'
|
||||||
sh 'echo ${CUTOFF}'
|
sh 'echo ${CUTOFF}'
|
||||||
sh './preparations.sh ${CUTOFF}'
|
sh 'python3 preprocessing.py ${CUTOFF}'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('archiveArtifacts'){
|
stage('archiveArtifacts'){
|
||||||
steps{
|
steps{
|
||||||
archiveArtifacts 'data.dev'
|
archiveArtifacts 'test.csv'
|
||||||
archiveArtifacts 'data.train'
|
archiveArtifacts 'validate.csv'
|
||||||
archiveArtifacts 'data.test'
|
archiveArtifacts 'train.csv'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
36
preprocesing.py
Normal file
36
preprocesing.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
CUTOFF = int(sys.argv[1])
|
||||||
|
|
||||||
|
sc = pd.read_csv('who_suicide_statistics.csv')
|
||||||
|
|
||||||
|
age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2,
|
||||||
|
"35-54 years": 3, "55-74 years": 4, "75+ years": 5}
|
||||||
|
|
||||||
|
sex = {"male": 0, "female": 1}
|
||||||
|
|
||||||
|
# Usunięcie niepełnych danych
|
||||||
|
sc.dropna(inplace=True)
|
||||||
|
|
||||||
|
# Kategoryzacja
|
||||||
|
sc = pd.get_dummies(
|
||||||
|
sc, columns=['age', 'sex', 'country'], prefix='', prefix_sep='')
|
||||||
|
|
||||||
|
# CUTOFF
|
||||||
|
sc = sc.head(CUTOFF)
|
||||||
|
|
||||||
|
# podział na train validate i test
|
||||||
|
train, validate, test = np.split(sc.sample(frac=1, random_state=42),
|
||||||
|
[int(.6*len(sc)), int(.8*len(sc))])
|
||||||
|
|
||||||
|
# zapis do plików
|
||||||
|
train.to_csv('train.csv')
|
||||||
|
validate.to_csv('validate.csv')
|
||||||
|
test.to_csv('test.csv')
|
||||||
|
|
||||||
|
print(train)
|
||||||
|
print(validate)
|
||||||
|
print(test)
|
Loading…
Reference in New Issue
Block a user