2023-04-20 21:03:45 +02:00
|
|
|
import pandas as pd
|
|
|
|
import os
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
2023-05-12 00:08:16 +02:00
|
|
|
cutoff = int(os.environ['CUTOFF'])
|
2023-04-20 21:03:45 +02:00
|
|
|
|
2023-05-11 23:28:25 +02:00
|
|
|
data = pd.read_csv('./barcelona_weekends.csv')
|
2023-05-12 00:08:16 +02:00
|
|
|
data = data[:cutoff]
|
2023-04-20 21:03:45 +02:00
|
|
|
data = data.iloc[:, 1:]
|
|
|
|
|
|
|
|
train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
|
|
|
|
[int(.6 * len(data)), int(.8 * len(data))])
|
|
|
|
|
2023-05-11 23:28:25 +02:00
|
|
|
train_set.to_csv('barcelona_weekends.train.csv', index=False)
|
|
|
|
dev_set.to_csv('barcelona_weekends.dev.csv', index=False)
|
|
|
|
test_set.to_csv('barcelona_weekends.test.csv', index=False)
|
2023-05-11 21:11:50 +02:00
|
|
|
|
|
|
|
|
2023-05-11 23:28:25 +02:00
|
|
|
check = pd.read_csv('./barcelona_weekends.test.csv')
|
2023-05-11 21:11:50 +02:00
|
|
|
print(check.head())
|