fix jenkinsfile-create-dataset
This commit is contained in:
parent
5370f573fa
commit
75da800223
@ -1,28 +1,58 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent none
|
||||||
parameters {
|
/* parameters {
|
||||||
string(defaultValue: '6000',
|
string(defaultValue: '6000',
|
||||||
description: 'numbers of data entries to keep in train.csv',
|
description: 'numbers of data entries to keep in train.csv',
|
||||||
name: 'CUTOFF',
|
name: 'CUTOFF',
|
||||||
trim: true)
|
trim: true)
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
stages {
|
stages {
|
||||||
stage('sh: Shell Script') {
|
stage('copy files') {
|
||||||
|
agent any
|
||||||
|
steps {
|
||||||
|
sh '''
|
||||||
|
cp ./lab1/script.sh .
|
||||||
|
cp ./lab1/python_script.py .
|
||||||
|
cp ./lab3/Dockerfile .
|
||||||
|
cp ./lab3/requirements.txt .
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* stage('sh: Shell Script') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
||||||
sh 'chmod +x ./lab2/script-zadanie-2-4.sh'
|
sh ''' chmod +x ./lab2/script-zadanie-2-4.sh
|
||||||
sh './lab2/script-zadanie-2-4.sh'
|
./lab2/script-zadanie-2-4.sh
|
||||||
sh 'chmod +x ./lab2/script-zadanie-2-4-cutoff.sh'
|
chmod +x ./lab2/script-zadanie-2-4-cutoff.sh'''
|
||||||
sh "./lab2/script-zadanie-2-4-cutoff.sh ${params.CUTOFF}"
|
sh "./lab2/script-zadanie-2-4-cutoff.sh ${params.CUTOFF}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('archive artifacts') {
|
*/
|
||||||
steps {
|
stage('docker') {
|
||||||
archiveArtifacts 'train.csv'
|
agent {
|
||||||
archiveArtifacts 'test.csv'
|
dockerfile true
|
||||||
archiveArtifacts 'valid.csv'
|
}
|
||||||
|
stages {
|
||||||
|
stage('test') {
|
||||||
|
steps {
|
||||||
|
sh 'cat /etc/issue'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('actual') {
|
||||||
|
steps {
|
||||||
|
sh './script.sh'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('archive artifacts') {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts 'train.csv'
|
||||||
|
archiveArtifacts 'test.csv'
|
||||||
|
archiveArtifacts 'valid.csv'
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
19
lab3/Dockerfile
Normal file
19
lab3/Dockerfile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
RUN apt update >>/dev/null
|
||||||
|
RUN apt install -y apt-utils >>/dev/null
|
||||||
|
RUN apt install -y python3.8 >>/dev/null
|
||||||
|
RUN apt install -y python3-pip >>/dev/null
|
||||||
|
RUN apt install -y unzip >>/dev/null
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./python_script.py ./
|
||||||
|
COPY ./script.sh ./
|
||||||
|
RUN chmod +x script.sh
|
||||||
|
|
||||||
|
COPY ./requirements.txt ./
|
||||||
|
|
||||||
|
RUN pip3 install -r requirements.txt >>/dev/null
|
||||||
|
|
||||||
|
CMD ./script.sh
|
37
lab3/python_script.py
Normal file
37
lab3/python_script.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
df = pd.read_csv('smart_grid_stability_augmented.csv')
|
||||||
|
scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])
|
||||||
|
df_norm_array = scaler.transform(df.iloc[:, 0:-1])
|
||||||
|
df_norm = pd.DataFrame(data=df_norm_array,
|
||||||
|
columns=df.columns[:-1])
|
||||||
|
df_norm['stabf'] = df['stabf']
|
||||||
|
|
||||||
|
train, testAndValid = train_test_split(
|
||||||
|
df_norm,
|
||||||
|
test_size=0.2,
|
||||||
|
random_state=42,
|
||||||
|
stratify=df_norm['stabf'])
|
||||||
|
|
||||||
|
test, valid = train_test_split(
|
||||||
|
testAndValid,
|
||||||
|
test_size=0.5,
|
||||||
|
random_state=42,
|
||||||
|
stratify=testAndValid['stabf'])
|
||||||
|
|
||||||
|
|
||||||
|
def namestr(obj, namespace):
|
||||||
|
return [name for name in namespace if namespace[name] is obj]
|
||||||
|
|
||||||
|
|
||||||
|
dataset = df_norm
|
||||||
|
for x in [dataset, train, test, valid]:
|
||||||
|
print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1])
|
||||||
|
print("size:", len(x))
|
||||||
|
print(x.describe(include='all'))
|
||||||
|
print("class distribution", x.value_counts('stabf'))
|
||||||
|
print('===============================================================')
|
3
lab3/requirements.txt
Normal file
3
lab3/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
kaggle==1.5.12
|
||||||
|
pandas==1.1.2
|
||||||
|
sklearn==0.0
|
6
lab3/script.sh
Normal file
6
lab3/script.sh
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1
|
||||||
|
unzip smart-grid-stability.zip >>/dev/null 2>&1
|
||||||
|
|
||||||
|
python3 python_script.py
|
Loading…
Reference in New Issue
Block a user