Add CUTOFF and add sh file
This commit is contained in:
parent
cb4e78e1c4
commit
de49bab0b1
21
Jenkinsfile
vendored
21
Jenkinsfile
vendored
@ -2,10 +2,10 @@ pipeline {
|
||||
agent any
|
||||
//Definijuemy parametry, które będzie można podać podczas wywoływania zadania
|
||||
parameters {
|
||||
string (
|
||||
defaultValue: 'Hello World!',
|
||||
description: 'Tekst, którym chcesz przywitać świat',
|
||||
name: 'INPUT_TEXT',
|
||||
string(
|
||||
defaultValue: '1000',
|
||||
description: 'Amount of values to be used from dataset',
|
||||
name: 'CUTOFF',
|
||||
trim: false
|
||||
)
|
||||
string(
|
||||
@ -21,24 +21,17 @@ pipeline {
|
||||
)
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git'
|
||||
}
|
||||
}
|
||||
stage('Prepare data') {
|
||||
steps {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||
sh 'kaggle datasets download amalab182/property-salesmelbourne-city'
|
||||
sh 'mkdir -p ium_z487183/data'
|
||||
sh 'unzip -o property-salesmelbourne-city.zip -d ium_z487183/data'
|
||||
sh 'rm property-salesmelbourne-city.zip'
|
||||
sh 'cd ium_z487183'
|
||||
sh './get-data.sh'
|
||||
sh 'python3 prepare_dataset.py'
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive artifacts') {
|
||||
steps {
|
||||
sh 'python3 ium_z487183/prepare-dataset.py'
|
||||
archiveArtifacts 'X_test.csv'
|
||||
archiveArtifacts 'X_val.csv'
|
||||
archiveArtifacts 'X_train.csv'
|
||||
|
5
get-data.sh
Executable file
5
get-data.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
kaggle datasets download amalab182/property-salesmelbourne-city
|
||||
mkdir -p data
|
||||
unzip -o property-salesmelbourne-city.zip -d data
|
||||
rm property-salesmelbourne-city.zip
|
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
@ -27,6 +28,10 @@ sells["Price"] = sells["Price"] / sells["Price"].max()
|
||||
sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
|
||||
sells["Distance"] = sells["Distance"] / sells["Distance"].max()
|
||||
|
||||
# cut off dataset to fixed number of values
|
||||
cutoff = int(os.environ['CUTOFF'])
|
||||
sells = sells.sample(cutoff)
|
||||
|
||||
# split to train/dev/test subsets
|
||||
X = sells
|
||||
Y = sells.pop('Price')
|
Loading…
Reference in New Issue
Block a user