From 1afa0cf50eb85d796f16ab7b08b1c525715e75ab Mon Sep 17 00:00:00 2001 From: piotrwrzodak Date: Thu, 20 Apr 2023 21:03:45 +0200 Subject: [PATCH] add create-dataset python file --- create-dataset.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 create-dataset.py diff --git a/create-dataset.py b/create-dataset.py new file mode 100644 index 0000000..34955f4 --- /dev/null +++ b/create-dataset.py @@ -0,0 +1,17 @@ +import pandas as pd +import os +import numpy as np + + +cutoff = int(os.environ['CUTOFF']) + +data = pd.read_csv('./ium_z444510/barcelona_weekends.csv') +data = data.sample(cutoff) +data = data.iloc[:, 1:] + +train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42), + [int(.6 * len(data)), int(.8 * len(data))]) + +train_set.to_csv('train.csv', index=False) +dev_set.to_csv('dev.csv', index=False) +test_set.to_csv('test.csv', index=False)