Add CUTOFF and add sh file

2023-04-21 09:37:13 +02:00 · 2023-04-21 09:37:13 +02:00 · de49bab0b1
commit de49bab0b1
parent cb4e78e1c4
3 changed files with 17 additions and 14 deletions
--- a/21
+++ b/21
@ -2,10 +2,10 @@ pipeline {
   agent any
   //Definijuemy parametry, które będzie można podać podczas wywoływania zadania
   parameters {
-      string (
+      string(
-         defaultValue: 'Hello World!',
+         defaultValue: '1000',
-         description: 'Tekst, którym chcesz przywitać świat',
+         description: 'Amount of values to be used from dataset',
-         name: 'INPUT_TEXT',
+         name: 'CUTOFF',
         trim: false
      )
      string(
@ -21,24 +21,17 @@ pipeline {
      )
   }
   stages {
      stage('Checkout') {
         steps {
            sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git' 
         }
      }
      stage('Prepare data') {
         steps {
            withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-               sh 'kaggle datasets download amalab182/property-salesmelbourne-city'
+               sh 'cd ium_z487183'
-               sh 'mkdir -p ium_z487183/data'
+               sh './get-data.sh'
-               sh 'unzip -o property-salesmelbourne-city.zip -d ium_z487183/data'
+               sh 'python3 prepare_dataset.py'
               sh 'rm property-salesmelbourne-city.zip'
            }
         }
      }
      stage('Archive artifacts') {
         steps {
            sh 'python3 ium_z487183/prepare-dataset.py'
            archiveArtifacts 'X_test.csv'
            archiveArtifacts 'X_val.csv'
            archiveArtifacts 'X_train.csv'
--- a/get-data.sh
+++ b/get-data.sh
@ -0,0 +1,5 @@
 #!/bin/bash
 kaggle datasets download amalab182/property-salesmelbourne-city
 mkdir -p data
 unzip -o property-salesmelbourne-city.zip -d data
 rm property-salesmelbourne-city.zip
--- a/prepare_dataset.py
+++ b/prepare_dataset.py
@ -1,3 +1,4 @@
 import os
 import pandas as pd
 from sklearn.model_selection import train_test_split
@ -27,6 +28,10 @@ sells["Price"] = sells["Price"] / sells["Price"].max()
 sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
 sells["Distance"] = sells["Distance"] / sells["Distance"].max()
 # cut off dataset to fixed number of values
 cutoff = int(os.environ['CUTOFF'])
 sells = sells.sample(cutoff)
 # split to train/dev/test subsets
 X = sells
 Y = sells.pop('Price')