From c1d9cdb0d1c80af47136df0c95aec552d6a964d2 Mon Sep 17 00:00:00 2001
From: Adrian Charkiewicz <riraasaa@gmail.com>
Date: Sun, 3 Apr 2022 22:17:18 +0200
Subject: [PATCH] dockerfile jenkinsfile

---
 Dockerfile         | 23 +++++++++++++++++++++++
 Jenkinsfile        | 10 +++++++++-
 Jenkinsfile2       |  4 +++-
 data_processing.py | 24 ++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 data_processing.py

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..0c9ab4e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,23 @@
+FROM ubuntu:latest
+# Install required dependencies
+RUN apt update
+RUN apt-get update
+RUN apt install -y figlet
+RUN export PATH=”$PATH:/usr/local/bin/python”
+RUN apt install python3-pip -y
+RUN apt install unzip -y
+RUN pip3 install kaggle
+RUN pip3 install pandas
+RUN pip3 install scikit-learn
+RUN pip3 install matplotlib
+RUN mkdir ~/.kaggle/
+RUN echo '{"username":"riraasaa","key":"1b1376b538ecd7da9e79b94d218ae3ec"}' > ~/.kaggle/kaggle.json
+# Create app directory in image
+WORKDIR /app
+# Copy init dataset script to /app directory in image
+COPY ./data_processing.py ./
+# Download kaggle dataset
+RUN kaggle datasets download -d uciml/red-wine-quality-cortez-et-al-2009
+RUN unzip -o red-wine-quality-cortez-et-al-2009.zip
+# Script executed after docker run
+CMD python3 ./data_processing.py
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index f13b333..07c5129 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,5 +1,7 @@
 pipeline {
-    agent any
+    agent {
+        docker { image 'ium' }
+        }
     parameters{
         password(
             defaultValue: '', 
@@ -22,6 +24,12 @@ pipeline {
         KAGGLE_KEY="$params.KAGGLE_KEY"
         CUTOFF="$params.CUTOFF"
     }
+    agent {
+        dockerfile {
+            additionalBuildArgs "-t ium"
+        }
+    }
+
     stages {
         stage("Check out from version control") {
             steps {
diff --git a/Jenkinsfile2 b/Jenkinsfile2
index 047a791..4243fac 100644
--- a/Jenkinsfile2
+++ b/Jenkinsfile2
@@ -1,5 +1,7 @@
 pipeline {
-    agent any
+    agent {
+        docker { image 'ium' }
+        }
     parameters{
         buildSelector(
             defaultSelector: lastSuccessful(),
diff --git a/data_processing.py b/data_processing.py
new file mode 100644
index 0000000..c859a96
--- /dev/null
+++ b/data_processing.py
@@ -0,0 +1,24 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler
+wine = pd.read_csv('winequality-red.csv')
+
+
+X_train,X_rem,y_train,y_rem = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine["quality"])
+X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)
+
+print("Wielkosc danych: train,test,valid:")
+print(X_train.shape)
+print(X_valid.shape)
+print(X_test.shape)
+print("wine describe:")
+print(wine.describe())
+
+norm = MinMaxScaler()
+norm_fit = norm.fit(X_train)
+norm_X_train = norm_fit.transform(X_train)
+norm_X_test = norm_fit.transform(X_test)
+norm_X_valid = norm_fit.transform(X_valid)
+
+