fix jenkinsfile-create-dataset

2021-04-11 11:04:32 +02:00 · 2021-04-11 11:04:32 +02:00 · 75da800223
commit 75da800223
parent 5370f573fa
5 changed files with 106 additions and 11 deletions
--- a/lab2/Jenkinsfile_create_dataset
+++ b/lab2/Jenkinsfile_create_dataset
@ -1,28 +1,58 @@
 pipeline {
-    agent any
-    parameters {
+    agent none
+/*    parameters {
        string(defaultValue: '6000',
            description: 'numbers of data entries to keep in train.csv',
            name: 'CUTOFF',
            trim: true)
    }
+*/
    stages {
-        stage('sh: Shell Script') {
+        stage('copy files') {
+            agent any
+                steps {
+                    sh '''
+                        cp ./lab1/script.sh .
+                        cp ./lab1/python_script.py .
+                        cp ./lab3/Dockerfile .
+                        cp ./lab3/requirements.txt .
+                        '''
+                }
+        }
+/*        stage('sh: Shell Script') {
            steps {
                withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
                "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
-                    sh 'chmod +x ./lab2/script-zadanie-2-4.sh'
-                    sh './lab2/script-zadanie-2-4.sh'
-                    sh 'chmod +x ./lab2/script-zadanie-2-4-cutoff.sh'
+                    sh ''' chmod +x ./lab2/script-zadanie-2-4.sh
+                            ./lab2/script-zadanie-2-4.sh
+                            chmod +x ./lab2/script-zadanie-2-4-cutoff.sh'''
                    sh "./lab2/script-zadanie-2-4-cutoff.sh ${params.CUTOFF}"
                }
            }
        }
-        stage('archive artifacts') {
-            steps {
-                archiveArtifacts 'train.csv'
-                archiveArtifacts 'test.csv'
-                archiveArtifacts 'valid.csv'
+*/
+        stage('docker') {
+            agent {
+                dockerfile true
+            }
+            stages {
+                stage('test') {
+                    steps {
+                    sh 'cat /etc/issue'
+                    }
+                }
+                stage('actual') {
+                    steps {
+                    sh './script.sh'
+                    }
+                }
+                stage('archive artifacts') {
+                    steps {
+                        archiveArtifacts 'train.csv'
+                        archiveArtifacts 'test.csv'
+                        archiveArtifacts 'valid.csv'
+                    }
+                }
            }
        }
    }
--- a/lab3/Dockerfile
+++ b/lab3/Dockerfile
@ -0,0 +1,19 @@
+FROM ubuntu:latest
+
+RUN apt update >>/dev/null
+RUN apt install -y apt-utils >>/dev/null
+RUN apt install -y python3.8 >>/dev/null
+RUN apt install -y python3-pip >>/dev/null
+RUN apt install -y unzip >>/dev/null
+
+WORKDIR /app
+
+COPY ./python_script.py ./
+COPY ./script.sh ./
+RUN chmod +x script.sh
+
+COPY ./requirements.txt ./
+
+RUN pip3 install -r requirements.txt >>/dev/null
+
+CMD ./script.sh
--- a/lab3/python_script.py
+++ b/lab3/python_script.py
@ -0,0 +1,37 @@
+import pandas as pd
+
+from sklearn import preprocessing
+from sklearn.model_selection import train_test_split
+
+
+df = pd.read_csv('smart_grid_stability_augmented.csv')
+scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])
+df_norm_array = scaler.transform(df.iloc[:, 0:-1])
+df_norm = pd.DataFrame(data=df_norm_array,
+                       columns=df.columns[:-1])
+df_norm['stabf'] = df['stabf']
+
+train, testAndValid = train_test_split(
+    df_norm,
+    test_size=0.2,
+    random_state=42,
+    stratify=df_norm['stabf'])
+
+test, valid = train_test_split(
+    testAndValid,
+    test_size=0.5,
+    random_state=42,
+    stratify=testAndValid['stabf'])
+
+
+def namestr(obj, namespace):
+    return [name for name in namespace if namespace[name] is obj]
+
+
+dataset = df_norm
+for x in [dataset, train, test, valid]:
+    print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1])
+    print("size:", len(x))
+    print(x.describe(include='all'))
+    print("class distribution", x.value_counts('stabf'))
+    print('===============================================================')
--- a/lab3/requirements.txt
+++ b/lab3/requirements.txt
@ -0,0 +1,3 @@
+kaggle==1.5.12
+pandas==1.1.2
+sklearn==0.0
--- a/lab3/script.sh
+++ b/lab3/script.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+
+kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1
+unzip smart-grid-stability.zip >>/dev/null 2>&1
+
+python3 python_script.py