From c1d9cdb0d1c80af47136df0c95aec552d6a964d2 Mon Sep 17 00:00:00 2001 From: Adrian Charkiewicz Date: Sun, 3 Apr 2022 22:17:18 +0200 Subject: [PATCH] dockerfile jenkinsfile --- Dockerfile | 23 +++++++++++++++++++++++ Jenkinsfile | 10 +++++++++- Jenkinsfile2 | 4 +++- data_processing.py | 24 ++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 data_processing.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0c9ab4e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM ubuntu:latest +# Install required dependencies +RUN apt update +RUN apt-get update +RUN apt install -y figlet +RUN export PATH=”$PATH:/usr/local/bin/python” +RUN apt install python3-pip -y +RUN apt install unzip -y +RUN pip3 install kaggle +RUN pip3 install pandas +RUN pip3 install scikit-learn +RUN pip3 install matplotlib +RUN mkdir ~/.kaggle/ +RUN echo '{"username":"riraasaa","key":"1b1376b538ecd7da9e79b94d218ae3ec"}' > ~/.kaggle/kaggle.json +# Create app directory in image +WORKDIR /app +# Copy init dataset script to /app directory in image +COPY ./data_processing.py ./ +# Download kaggle dataset +RUN kaggle datasets download -d uciml/red-wine-quality-cortez-et-al-2009 +RUN unzip -o red-wine-quality-cortez-et-al-2009.zip +# Script executed after docker run +CMD python3 ./data_processing.py \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index f13b333..07c5129 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,7 @@ pipeline { - agent any + agent { + docker { image 'ium' } + } parameters{ password( defaultValue: '', @@ -22,6 +24,12 @@ pipeline { KAGGLE_KEY="$params.KAGGLE_KEY" CUTOFF="$params.CUTOFF" } + agent { + dockerfile { + additionalBuildArgs "-t ium" + } + } + stages { stage("Check out from version control") { steps { diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 047a791..4243fac 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -1,5 +1,7 @@ pipeline { - agent any + agent { + docker { image 'ium' } + } parameters{ buildSelector( defaultSelector: lastSuccessful(), diff --git a/data_processing.py b/data_processing.py new file mode 100644 index 0000000..c859a96 --- /dev/null +++ b/data_processing.py @@ -0,0 +1,24 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler +wine = pd.read_csv('winequality-red.csv') + + +X_train,X_rem,y_train,y_rem = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine["quality"]) +X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5) + +print("Wielkosc danych: train,test,valid:") +print(X_train.shape) +print(X_valid.shape) +print(X_test.shape) +print("wine describe:") +print(wine.describe()) + +norm = MinMaxScaler() +norm_fit = norm.fit(X_train) +norm_X_train = norm_fit.transform(X_train) +norm_X_test = norm_fit.transform(X_test) +norm_X_valid = norm_fit.transform(X_valid) + +