ium_452639/split_train_valid_test.py

14 lines
433 B
Python
Raw Normal View History

2023-03-21 01:00:51 +01:00
import pandas as pd
from sklearn.model_selection import train_test_split
TEST_SIZE = 25
VALID_SIZE = 25
data = pd.read_csv('./stop_times.normalized.tsv', sep='\t')
train, test = train_test_split(data, test_size=TEST_SIZE+VALID_SIZE)
valid, test = train_test_split(test, test_size=TEST_SIZE)
train.to_csv('stop_times.train.tsv', sep='\t')
test.to_csv('stop_times.test.tsv', sep='\t')
valid.to_csv('stop_times.valid.tsv', sep='\t')