Add CUTOFF and add sh file

This commit is contained in:
Marek Moryl 2023-04-21 09:37:13 +02:00
parent cb4e78e1c4
commit de49bab0b1
3 changed files with 17 additions and 14 deletions

21
Jenkinsfile vendored
View File

@ -2,10 +2,10 @@ pipeline {
agent any
//Definijuemy parametry, które będzie można podać podczas wywoływania zadania
parameters {
string (
defaultValue: 'Hello World!',
description: 'Tekst, którym chcesz przywitać świat',
name: 'INPUT_TEXT',
string(
defaultValue: '1000',
description: 'Amount of values to be used from dataset',
name: 'CUTOFF',
trim: false
)
string(
@ -21,24 +21,17 @@ pipeline {
)
}
stages {
stage('Checkout') {
steps {
sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git'
}
}
stage('Prepare data') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'kaggle datasets download amalab182/property-salesmelbourne-city'
sh 'mkdir -p ium_z487183/data'
sh 'unzip -o property-salesmelbourne-city.zip -d ium_z487183/data'
sh 'rm property-salesmelbourne-city.zip'
sh 'cd ium_z487183'
sh './get-data.sh'
sh 'python3 prepare_dataset.py'
}
}
}
stage('Archive artifacts') {
steps {
sh 'python3 ium_z487183/prepare-dataset.py'
archiveArtifacts 'X_test.csv'
archiveArtifacts 'X_val.csv'
archiveArtifacts 'X_train.csv'

5
get-data.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
kaggle datasets download amalab182/property-salesmelbourne-city
mkdir -p data
unzip -o property-salesmelbourne-city.zip -d data
rm property-salesmelbourne-city.zip

View File

@ -1,3 +1,4 @@
import os
import pandas as pd
from sklearn.model_selection import train_test_split
@ -27,6 +28,10 @@ sells["Price"] = sells["Price"] / sells["Price"].max()
sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
sells["Distance"] = sells["Distance"] / sells["Distance"].max()
# cut off dataset to fixed number of values
cutoff = int(os.environ['CUTOFF'])
sells = sells.sample(cutoff)
# split to train/dev/test subsets
X = sells
Y = sells.pop('Price')