Add CUTOFF and add sh file

This commit is contained in:
Marek Moryl 2023-04-21 09:37:13 +02:00
parent cb4e78e1c4
commit de49bab0b1
3 changed files with 17 additions and 14 deletions

21
Jenkinsfile vendored
View File

@ -2,10 +2,10 @@ pipeline {
agent any agent any
//Definijuemy parametry, które będzie można podać podczas wywoływania zadania //Definijuemy parametry, które będzie można podać podczas wywoływania zadania
parameters { parameters {
string ( string(
defaultValue: 'Hello World!', defaultValue: '1000',
description: 'Tekst, którym chcesz przywitać świat', description: 'Amount of values to be used from dataset',
name: 'INPUT_TEXT', name: 'CUTOFF',
trim: false trim: false
) )
string( string(
@ -21,24 +21,17 @@ pipeline {
) )
} }
stages { stages {
stage('Checkout') {
steps {
sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git'
}
}
stage('Prepare data') { stage('Prepare data') {
steps { steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'kaggle datasets download amalab182/property-salesmelbourne-city' sh 'cd ium_z487183'
sh 'mkdir -p ium_z487183/data' sh './get-data.sh'
sh 'unzip -o property-salesmelbourne-city.zip -d ium_z487183/data' sh 'python3 prepare_dataset.py'
sh 'rm property-salesmelbourne-city.zip'
} }
} }
} }
stage('Archive artifacts') { stage('Archive artifacts') {
steps { steps {
sh 'python3 ium_z487183/prepare-dataset.py'
archiveArtifacts 'X_test.csv' archiveArtifacts 'X_test.csv'
archiveArtifacts 'X_val.csv' archiveArtifacts 'X_val.csv'
archiveArtifacts 'X_train.csv' archiveArtifacts 'X_train.csv'

5
get-data.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
kaggle datasets download amalab182/property-salesmelbourne-city
mkdir -p data
unzip -o property-salesmelbourne-city.zip -d data
rm property-salesmelbourne-city.zip

View File

@ -1,3 +1,4 @@
import os
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
@ -27,6 +28,10 @@ sells["Price"] = sells["Price"] / sells["Price"].max()
sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max() sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
sells["Distance"] = sells["Distance"] / sells["Distance"].max() sells["Distance"] = sells["Distance"] / sells["Distance"].max()
# cut off dataset to fixed number of values
cutoff = int(os.environ['CUTOFF'])
sells = sells.sample(cutoff)
# split to train/dev/test subsets # split to train/dev/test subsets
X = sells X = sells
Y = sells.pop('Price') Y = sells.pop('Price')