ium_444354/data_processing.py

25 lines
737 B
Python
Raw Normal View History

2022-04-03 22:17:18 +02:00
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
wine = pd.read_csv('winequality-red.csv')
X_train,X_rem,y_train,y_rem = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine["quality"])
X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)
2022-04-05 02:01:27 +02:00
print("Wielkosc danych: train, test, valid:")
2022-04-03 22:17:18 +02:00
print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)
print("wine describe:")
print(wine.describe())
norm = MinMaxScaler()
norm_fit = norm.fit(X_train)
norm_X_train = norm_fit.transform(X_train)
norm_X_test = norm_fit.transform(X_test)
norm_X_valid = norm_fit.transform(X_valid)