diff --git a/ium-data.py b/ium-data.py index 53717d7..d5ad1a4 100755 --- a/ium-data.py +++ b/ium-data.py @@ -42,6 +42,11 @@ disney['Review_Text'] = disney['Review_Text'].str.lower() d_train, d_remainder = train_test_split(disney, test_size=0.2, random_state=1, stratify=disney["Branch"]) d_dev, d_test = train_test_split(d_remainder, test_size=0.5, random_state=1, stratify=d_remainder["Branch"]) +# Zapis do plików +d_train.to_csv('d_train.csv', index=False) +d_test.to_csv('d_test.csv', index=False) +d_dev.to_csv('d_dev.csv', index=False) + # Statystyki print(f"Wielkość całego zbioru: {disney.shape[0]}\n" f"Inne statystyki:") @@ -69,4 +74,4 @@ try: plt.suptitle('Rozkład ocen w całym zbiorze') plt.show() except: - print("Error drawing hist plot (Powinno działać w Pycharmie)") \ No newline at end of file + print("Error drawing hist plot (Powinno działać w Pycharmie)")