25 lines
735 B
Python
25 lines
735 B
Python
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.preprocessing import MinMaxScaler
|
||
|
wine = pd.read_csv('winequality-red.csv')
|
||
|
|
||
|
|
||
|
X_train,X_rem,y_train,y_rem = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine["quality"])
|
||
|
X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)
|
||
|
|
||
|
print("Wielkosc danych: train,test,valid:")
|
||
|
print(X_train.shape)
|
||
|
print(X_valid.shape)
|
||
|
print(X_test.shape)
|
||
|
print("wine describe:")
|
||
|
print(wine.describe())
|
||
|
|
||
|
norm = MinMaxScaler()
|
||
|
norm_fit = norm.fit(X_train)
|
||
|
norm_X_train = norm_fit.transform(X_train)
|
||
|
norm_X_test = norm_fit.transform(X_test)
|
||
|
norm_X_valid = norm_fit.transform(X_valid)
|
||
|
|
||
|
|