ium_z444510/create-dataset.py

22 lines
583 B
Python
Raw Normal View History

2023-04-20 21:03:45 +02:00
import pandas as pd
import os
import numpy as np
2023-05-11 21:11:50 +02:00
cutoff = 10
2023-04-20 21:03:45 +02:00
2023-05-11 22:42:00 +02:00
data = pd.read_csv('./data/barcelona_weekends.csv')
2023-04-20 21:03:45 +02:00
data = data.sample(cutoff)
data = data.iloc[:, 1:]
train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
[int(.6 * len(data)), int(.8 * len(data))])
2023-05-11 22:42:00 +02:00
train_set.to_csv('data/barcelona_weekends.train.csv', index=False)
dev_set.to_csv('data/barcelona_weekends.dev.csv', index=False)
test_set.to_csv('data/barcelona_weekends.test.csv', index=False)
2023-05-11 21:11:50 +02:00
check = pd.read_csv('./train.csv')
print(check.head())