update Jenkinsfile

add create-dataset python file
2023-04-20 21:20:12 +02:00 · 2023-04-20 21:03:45 +02:00
2 changed files with 50 additions and 7 deletions
--- a/40
+++ b/40
@ -1,10 +1,36 @@
 pipeline {
-    agent any
-    stages {
-        stage('Stage 1') {
-            steps {
-                echo 'Hello world!'
-            }
-        }
+    agent  any
+    parameters{
+     string(
+         defaultValue: 'piotrwrzodak',
+         description: 'Kaggle username',
+         name: 'KAGGLE_USERNAME',
+         trim: false
+     )
+     password(
+         defaultValue: '',
+         description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
+         name: 'KAGGLE_KEY'
+     )
+     string(
+         defaultValue: '1000',
+         description: 'CUTOFF',
+         name: 'CUTOFF',
+         trim: false
+     )
    }
+    stages {
+      stage('Build') {
+         steps {
+            sh 'git clone https://git.wmi.amu.edu.pl/s444510/ium_z444510.git'
+            withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
+               sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities'
+               sh 'unzip airbnb-prices-in-european-cities.zip -d ./ium_z444510'
+               sh 'rm airbnb-prices-in-european-cities.zip'
+               sh 'ls -a'
+               sh 'ls -a ./ium_z444510'
+            }
+         }
+      }
+   }
 }
--- a/create-dataset.py
+++ b/create-dataset.py
@ -0,0 +1,17 @@
+import pandas as pd
+import os
+import numpy as np
+
+
+cutoff = int(os.environ['CUTOFF'])
+
+data = pd.read_csv('./ium_z444510/barcelona_weekends.csv')
+data = data.sample(cutoff)
+data = data.iloc[:, 1:]
+
+train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
+                                        [int(.6 * len(data)), int(.8 * len(data))])
+
+train_set.to_csv('train.csv', index=False)
+dev_set.to_csv('dev.csv', index=False)
+test_set.to_csv('test.csv', index=False)
Author	SHA1	Message	Date
piotrwrzodak	31ffbd656c	update Jenkinsfile	2023-04-20 21:20:12 +02:00
piotrwrzodak	1afa0cf50e	add create-dataset python file	2023-04-20 21:03:45 +02:00