ium_452639/split_train_valid_test.py

13 lines
402 B
Python
Raw Normal View History

2023-03-21 01:56:37 +01:00
#!/usr/bin/env python3
2023-03-21 01:00:51 +01:00
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('./stop_times.normalized.tsv', sep='\t')
2023-03-21 01:56:37 +01:00
train, test = train_test_split(data, test_size=0.5)
valid, test = train_test_split(test, test_size=0.5)
2023-03-21 01:00:51 +01:00
train.to_csv('stop_times.train.tsv', sep='\t')
test.to_csv('stop_times.test.tsv', sep='\t')
valid.to_csv('stop_times.valid.tsv', sep='\t')