update Jenkinsfile

add create-dataset python file
2023-04-20 21:20:12 +02:00 · 2023-04-20 21:03:45 +02:00
2 changed files with 50 additions and 7 deletions
--- a/30
+++ b/30
@ -1,9 +1,35 @@
 pipeline {
    agent  any
    parameters{
     string(
         defaultValue: 'piotrwrzodak',
         description: 'Kaggle username',
         name: 'KAGGLE_USERNAME',
         trim: false
     )
     password(
         defaultValue: '',
         description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
         name: 'KAGGLE_KEY'
     )
     string(
         defaultValue: '1000',
         description: 'CUTOFF',
         name: 'CUTOFF',
         trim: false
     )
    }
    stages {
-        stage('Stage 1') {
+      stage('Build') {
         steps {
-                echo 'Hello world!'
+            sh 'git clone https://git.wmi.amu.edu.pl/s444510/ium_z444510.git'
            withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
               sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities'
               sh 'unzip airbnb-prices-in-european-cities.zip -d ./ium_z444510'
               sh 'rm airbnb-prices-in-european-cities.zip'
               sh 'ls -a'
               sh 'ls -a ./ium_z444510'
            }
         }
      }
   }
--- a/create-dataset.py
+++ b/create-dataset.py
@ -0,0 +1,17 @@
 import pandas as pd
 import os
 import numpy as np
 cutoff = int(os.environ['CUTOFF'])
 data = pd.read_csv('./ium_z444510/barcelona_weekends.csv')
 data = data.sample(cutoff)
 data = data.iloc[:, 1:]
 train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
                                        [int(.6 * len(data)), int(.8 * len(data))])
 train_set.to_csv('train.csv', index=False)
 dev_set.to_csv('dev.csv', index=False)
 test_set.to_csv('test.csv', index=False)
Author	SHA1	Message	Date
piotrwrzodak	31ffbd656c	update Jenkinsfile	2023-04-20 21:20:12 +02:00
piotrwrzodak	1afa0cf50e	add create-dataset python file	2023-04-20 21:03:45 +02:00