diff --git a/lab2/Jenkinsfile_create_dataset b/lab2/Jenkinsfile_create_dataset index ac13f07..bf3faa3 100644 --- a/lab2/Jenkinsfile_create_dataset +++ b/lab2/Jenkinsfile_create_dataset @@ -1,28 +1,58 @@ pipeline { - agent any - parameters { + agent none +/* parameters { string(defaultValue: '6000', description: 'numbers of data entries to keep in train.csv', name: 'CUTOFF', trim: true) } +*/ stages { - stage('sh: Shell Script') { + stage('copy files') { + agent any + steps { + sh ''' + cp ./lab1/script.sh . + cp ./lab1/python_script.py . + cp ./lab3/Dockerfile . + cp ./lab3/requirements.txt . + ''' + } + } +/* stage('sh: Shell Script') { steps { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { - sh 'chmod +x ./lab2/script-zadanie-2-4.sh' - sh './lab2/script-zadanie-2-4.sh' - sh 'chmod +x ./lab2/script-zadanie-2-4-cutoff.sh' + sh ''' chmod +x ./lab2/script-zadanie-2-4.sh + ./lab2/script-zadanie-2-4.sh + chmod +x ./lab2/script-zadanie-2-4-cutoff.sh''' sh "./lab2/script-zadanie-2-4-cutoff.sh ${params.CUTOFF}" } } } - stage('archive artifacts') { - steps { - archiveArtifacts 'train.csv' - archiveArtifacts 'test.csv' - archiveArtifacts 'valid.csv' +*/ + stage('docker') { + agent { + dockerfile true + } + stages { + stage('test') { + steps { + sh 'cat /etc/issue' + } + } + stage('actual') { + steps { + sh './script.sh' + } + } + stage('archive artifacts') { + steps { + archiveArtifacts 'train.csv' + archiveArtifacts 'test.csv' + archiveArtifacts 'valid.csv' + } + } } } } diff --git a/lab3/Dockerfile b/lab3/Dockerfile new file mode 100644 index 0000000..da7d233 --- /dev/null +++ b/lab3/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:latest + +RUN apt update >>/dev/null +RUN apt install -y apt-utils >>/dev/null +RUN apt install -y python3.8 >>/dev/null +RUN apt install -y python3-pip >>/dev/null +RUN apt install -y unzip >>/dev/null + +WORKDIR /app + +COPY ./python_script.py ./ +COPY ./script.sh ./ +RUN chmod +x script.sh + +COPY ./requirements.txt ./ + +RUN pip3 install -r requirements.txt >>/dev/null + +CMD ./script.sh diff --git a/lab3/python_script.py b/lab3/python_script.py new file mode 100644 index 0000000..832f54d --- /dev/null +++ b/lab3/python_script.py @@ -0,0 +1,37 @@ +import pandas as pd + +from sklearn import preprocessing +from sklearn.model_selection import train_test_split + + +df = pd.read_csv('smart_grid_stability_augmented.csv') +scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1]) +df_norm_array = scaler.transform(df.iloc[:, 0:-1]) +df_norm = pd.DataFrame(data=df_norm_array, + columns=df.columns[:-1]) +df_norm['stabf'] = df['stabf'] + +train, testAndValid = train_test_split( + df_norm, + test_size=0.2, + random_state=42, + stratify=df_norm['stabf']) + +test, valid = train_test_split( + testAndValid, + test_size=0.5, + random_state=42, + stratify=testAndValid['stabf']) + + +def namestr(obj, namespace): + return [name for name in namespace if namespace[name] is obj] + + +dataset = df_norm +for x in [dataset, train, test, valid]: + print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1]) + print("size:", len(x)) + print(x.describe(include='all')) + print("class distribution", x.value_counts('stabf')) + print('===============================================================') diff --git a/lab3/requirements.txt b/lab3/requirements.txt new file mode 100644 index 0000000..2d81d02 --- /dev/null +++ b/lab3/requirements.txt @@ -0,0 +1,3 @@ +kaggle==1.5.12 +pandas==1.1.2 +sklearn==0.0 diff --git a/lab3/script.sh b/lab3/script.sh new file mode 100644 index 0000000..91cf91a --- /dev/null +++ b/lab3/script.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1 +unzip smart-grid-stability.zip >>/dev/null 2>&1 + +python3 python_script.py