docker
This commit is contained in:
parent
839e258785
commit
0bcff8ff05
9
Dockerfile
Normal file
9
Dockerfile
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
RUN apt update && apt install -y python3-pip --no-install-recommends && pip3 install numpy && pip3 install pandas && pip3 install wget && pip3 install scikit-learn && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./create.py ./
|
||||||
|
COPY ./stats.py ./
|
||||||
|
|
23
Jenkinsfile
vendored
23
Jenkinsfile
vendored
@ -1,5 +1,7 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
parameters {
|
parameters {
|
||||||
string (
|
string (
|
||||||
defaultValue: '40',
|
defaultValue: '40',
|
||||||
@ -9,23 +11,22 @@ pipeline {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
|
stage('Docker'){
|
||||||
|
steps{
|
||||||
|
sh 'python3 ./create.py'
|
||||||
|
}
|
||||||
|
}
|
||||||
stage('checkout: Check out from version control') {
|
stage('checkout: Check out from version control') {
|
||||||
steps {
|
steps {
|
||||||
git 'https://git.wmi.amu.edu.pl/s434766/ium_434766.git'
|
git 'https://git.wmi.amu.edu.pl/s434766/ium_434766.git'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('sh: Shell Script') {
|
|
||||||
steps {
|
|
||||||
sh 'chmod +x script.sh'
|
|
||||||
sh './script.sh ${CUTOFF}'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage('archiveArtifacts') {
|
stage('archiveArtifacts') {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts 'scriptTest.csv'
|
archiveArtifacts 'data_val.csv'
|
||||||
archiveArtifacts 'scriptDev.csv'
|
archiveArtifacts 'data_test.csv'
|
||||||
archiveArtifacts 'scriptTrain.csv'
|
archiveArtifacts 'data_train.csv'
|
||||||
archiveArtifacts 'lab3.csv'
|
archiveArtifacts 'healthcare-dataset-stroke-data.csv'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
16
copyArtiJenkins/Jenkinsfile
vendored
16
copyArtiJenkins/Jenkinsfile
vendored
@ -9,12 +9,16 @@ pipeline {
|
|||||||
copyArtifacts fingerprintArtifacts: true, projectName: 's434766-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
copyArtifacts fingerprintArtifacts: true, projectName: 's434766-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('sh: Shell Script') {
|
stage('Docker image'){
|
||||||
steps {
|
agent {
|
||||||
sh 'chmod +x copyArtiJenkins/script2.sh'
|
docker {
|
||||||
sh './copyArtiJenkins/script2.sh'
|
image 'owczarczykp/ium_s434766'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
steps {
|
||||||
|
sh 'python3 ./stats.py > stats.txt'
|
||||||
|
}
|
||||||
|
}
|
||||||
stage('archiveArtifacts') {
|
stage('archiveArtifacts') {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts 'stats.txt'
|
archiveArtifacts 'stats.txt'
|
||||||
|
50
create.py
Normal file
50
create.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import wget
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
def downloadCSV():
|
||||||
|
url = 'https://git.wmi.amu.edu.pl/s434766/ium_434766/raw/branch/master/healthcare-dataset-stroke-data.csv'
|
||||||
|
wget.download(url, out='healthcare-dataset-stroke-data.csv', bar=None)
|
||||||
|
|
||||||
|
def dropNaN():
|
||||||
|
data = pd.read_csv('healthcare-dataset-stroke-data.csv')
|
||||||
|
data = data.dropna()
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def NormalizeData(data):
|
||||||
|
data = data.astype({"age": np.int64})
|
||||||
|
for col in data.columns:
|
||||||
|
if data[col].dtype == object: # STRINGS TO LOWERCASE
|
||||||
|
data[col] = data[col].str.lower()
|
||||||
|
if data[col].dtype == np.float64: # FLOATS TO VALUES IN [ 0, 1]
|
||||||
|
dataReshaped = data[col].values.reshape(-1,1)
|
||||||
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||||
|
data[col] = scaler.fit_transform(dataReshaped)
|
||||||
|
if col == 'ever_married': # YES/NO TO 1/0
|
||||||
|
data[col] = data[col].map(dict(yes=1, no=0))
|
||||||
|
if col == 'smoking_status':
|
||||||
|
data[col] = data[col].str.replace(" ", "_")
|
||||||
|
if col == 'work_type':
|
||||||
|
data[col] = data[col].str.replace("-", "_")
|
||||||
|
return data
|
||||||
|
|
||||||
|
def saveToCSV(data1,data2,data3):
|
||||||
|
data1.to_csv("data_train.csv", index=False)
|
||||||
|
data2.to_csv("data_test.csv",index=False)
|
||||||
|
data3.to_csv("data_val.csv",index=False)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
downloadCSV()
|
||||||
|
data = dropNaN()
|
||||||
|
data = NormalizeData(data)
|
||||||
|
|
||||||
|
data_train, data_test = train_test_split(data, test_size=0.2, random_state=1)
|
||||||
|
data_train, data_val = train_test_split(data_train, test_size=0.25, random_state=1) ## Twice to get 0.6, 0.2, 0.2
|
||||||
|
saveToCSV(data_train,data_test,data_val)
|
||||||
|
|
||||||
|
|
15
stats.py
Normal file
15
stats.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def describeDataset(dt, dt2, dv):
|
||||||
|
data = pd.read_csv('healthcare-dataset-stroke-data.csv')
|
||||||
|
print("Whole dataset size: ", data.size)
|
||||||
|
print("Train dataset size: ", dt.size)
|
||||||
|
print("Test dataset size: ", dt2.size)
|
||||||
|
print("Validate dataset size: ", dv.size)
|
||||||
|
print(data.describe(include='all'))
|
||||||
|
|
||||||
|
|
||||||
|
data_train = pd.read_csv('data_train.csv')
|
||||||
|
data_test = pd.read_csv('data_test.csv')
|
||||||
|
data_val = pd.read_csv('data_val.csv')
|
||||||
|
describeDataset(data_train,data_test,data_val)
|
Loading…
Reference in New Issue
Block a user