From e75c417504a20ec241f666f7a3c479c8f7cdc5d0 Mon Sep 17 00:00:00 2001 From: piotrwrzodak Date: Thu, 11 May 2023 22:58:08 +0200 Subject: [PATCH] 4.3.1 v8 --- Dockerfile | 7 +++++++ Jenkinsfile-create-dataset-docker | 7 +------ create-dataset.py | 8 ++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 804a083..4c47d7e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,3 +4,10 @@ RUN apt-get update && \ apt-get install -y python3 python3-pip RUN pip3 install pandas numpy + +WORKDIR /app + +COPY create-dataset.py /app +COPY data/barcelona_weekends.csv /app + +CMD ["python3", "create-dataset.py"] \ No newline at end of file diff --git a/Jenkinsfile-create-dataset-docker b/Jenkinsfile-create-dataset-docker index cc07fd6..5a3ec4c 100644 --- a/Jenkinsfile-create-dataset-docker +++ b/Jenkinsfile-create-dataset-docker @@ -44,12 +44,7 @@ pipeline { dockerfile true } steps { - sh 'python3 create-dataset.py' - } - } - stage('Archive') { - steps { - archiveArtifacts artifacts: 'data/barcelona_weekends.train.csv, data/barcelona_weekends.dev.csv, data/barcelona_weekends.test.csv', fingerprint: true + archiveArtifacts artifacts: 'barcelona_weekends.train.csv, barcelona_weekends.dev.csv, barcelona_weekends.test.csv', fingerprint: true } } } diff --git a/create-dataset.py b/create-dataset.py index 281d1be..d93da1e 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -5,16 +5,16 @@ import numpy as np cutoff = 10 -data = pd.read_csv('./data/barcelona_weekends.csv') +data = pd.read_csv('./barcelona_weekends.csv') data = data.sample(cutoff) data = data.iloc[:, 1:] train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42), [int(.6 * len(data)), int(.8 * len(data))]) -train_set.to_csv('data/barcelona_weekends.train.csv', index=False) -dev_set.to_csv('data/barcelona_weekends.dev.csv', index=False) -test_set.to_csv('data/barcelona_weekends.test.csv', index=False) +train_set.to_csv('barcelona_weekends.train.csv', index=False) +dev_set.to_csv('barcelona_weekends.dev.csv', index=False) +test_set.to_csv('barcelona_weekends.test.csv', index=False) check = pd.read_csv('./train.csv')