From 4aae76c38bdd70667df0753af524e67199f06ad6 Mon Sep 17 00:00:00 2001 From: Dawid Date: Fri, 14 May 2021 22:48:23 +0200 Subject: [PATCH] change creating datasets --- Jenkinsfile | 1 - stats.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ef7d8a4..418a3ce 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -25,7 +25,6 @@ pipeline{ steps{ withEnv(["CUTOFF=${params.CUTOFF}"]) { sh "chmod 777 ./data.sh" - sh "./data.sh" } }} stage('Archive artifacts'){ diff --git a/stats.py b/stats.py index 36bfa79..cfdbde0 100644 --- a/stats.py +++ b/stats.py @@ -11,6 +11,9 @@ df = pd.read_csv('country_vaccinations.csv') # podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))]) +train.to_csv("train.csv") +validate.to_csv("validate.csv") +test.to_csv("test.csv") # Wypisanie ilości elementów w poszczególnych ramkach danych print("Whole set size".ljust(20), df.size) print("Train set size: ".ljust(20), train.size)