Add CUTOFF and add sh file
This commit is contained in:
parent
cb4e78e1c4
commit
de49bab0b1
21
Jenkinsfile
vendored
21
Jenkinsfile
vendored
@ -2,10 +2,10 @@ pipeline {
|
|||||||
agent any
|
agent any
|
||||||
//Definijuemy parametry, które będzie można podać podczas wywoływania zadania
|
//Definijuemy parametry, które będzie można podać podczas wywoływania zadania
|
||||||
parameters {
|
parameters {
|
||||||
string (
|
string(
|
||||||
defaultValue: 'Hello World!',
|
defaultValue: '1000',
|
||||||
description: 'Tekst, którym chcesz przywitać świat',
|
description: 'Amount of values to be used from dataset',
|
||||||
name: 'INPUT_TEXT',
|
name: 'CUTOFF',
|
||||||
trim: false
|
trim: false
|
||||||
)
|
)
|
||||||
string(
|
string(
|
||||||
@ -21,24 +21,17 @@ pipeline {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
stage('Checkout') {
|
|
||||||
steps {
|
|
||||||
sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage('Prepare data') {
|
stage('Prepare data') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||||
sh 'kaggle datasets download amalab182/property-salesmelbourne-city'
|
sh 'cd ium_z487183'
|
||||||
sh 'mkdir -p ium_z487183/data'
|
sh './get-data.sh'
|
||||||
sh 'unzip -o property-salesmelbourne-city.zip -d ium_z487183/data'
|
sh 'python3 prepare_dataset.py'
|
||||||
sh 'rm property-salesmelbourne-city.zip'
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Archive artifacts') {
|
stage('Archive artifacts') {
|
||||||
steps {
|
steps {
|
||||||
sh 'python3 ium_z487183/prepare-dataset.py'
|
|
||||||
archiveArtifacts 'X_test.csv'
|
archiveArtifacts 'X_test.csv'
|
||||||
archiveArtifacts 'X_val.csv'
|
archiveArtifacts 'X_val.csv'
|
||||||
archiveArtifacts 'X_train.csv'
|
archiveArtifacts 'X_train.csv'
|
||||||
|
5
get-data.sh
Executable file
5
get-data.sh
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
kaggle datasets download amalab182/property-salesmelbourne-city
|
||||||
|
mkdir -p data
|
||||||
|
unzip -o property-salesmelbourne-city.zip -d data
|
||||||
|
rm property-salesmelbourne-city.zip
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
@ -27,6 +28,10 @@ sells["Price"] = sells["Price"] / sells["Price"].max()
|
|||||||
sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
|
sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
|
||||||
sells["Distance"] = sells["Distance"] / sells["Distance"].max()
|
sells["Distance"] = sells["Distance"] / sells["Distance"].max()
|
||||||
|
|
||||||
|
# cut off dataset to fixed number of values
|
||||||
|
cutoff = int(os.environ['CUTOFF'])
|
||||||
|
sells = sells.sample(cutoff)
|
||||||
|
|
||||||
# split to train/dev/test subsets
|
# split to train/dev/test subsets
|
||||||
X = sells
|
X = sells
|
||||||
Y = sells.pop('Price')
|
Y = sells.pop('Price')
|
Loading…
Reference in New Issue
Block a user