ium_z487186/main.py

28 lines
1.0 KiB
Python
Raw Normal View History

2023-04-21 10:50:31 +02:00
import pandas as pd
2023-04-21 10:34:30 +02:00
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datasets import load_dataset
dataset = load_dataset("mstz/liver")['train']
dataset = dataset.to_pandas()
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
train, val = train_test_split(train, test_size=0.2, random_state=42)
numerical_features = ['age', 'total_bilirubin', 'direct_ribilubin', 'alkaline_phosphotase',
'alamine_aminotransferasi', 'aspartate_aminotransferase', 'total_proteins', 'albumin',
'albumin_to_globulin_ratio']
scaler = MinMaxScaler()
train[numerical_features] = scaler.fit_transform(train[numerical_features])
test[numerical_features] = scaler.fit_transform(test[numerical_features])
val[numerical_features] = scaler.fit_transform(val[numerical_features])
train.dropna(inplace=True)
test.dropna(inplace=True)
val.dropna(inplace=True)
2023-04-21 10:50:31 +02:00
train.to_csv('liver.train.data')
test.to_csv('liver.test.data')
val.to_csv('liver.dev.data')