ium_434732/split_10.py
2021-06-09 17:52:05 +02:00

29 lines
814 B
Python

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
results = pd.read_csv('heart_failure_clinical_records_dataset.csv')
#brak wierszy z NaN
results.dropna()
results = results.astype({"age": np.int64})
for col in results.columns:
if results[col].dtype == np.float64: # FLOATS TO VALUES IN [ 0, 1]
dataReshaped = results[col].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
results[col] = scaler.fit_transform(dataReshaped)
# Podział zbioru 6:1:1
train, test = train_test_split(results, test_size= 1 - 0.6)
valid, test = train_test_split(test, test_size=0.5)
train.to_csv("train.csv", index=False)
valid.to_csv("valid.csv",index=False)
test.to_csv("test.csv",index=False)