46 KiB
46 KiB
Potrzebne importy
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
Odczytywanie danych
df = pd.read_csv('bodyPerformance.csv')
df.head()
age | gender | height_cm | weight_kg | body fat_% | diastolic | systolic | gripForce | sit and bend forward_cm | sit-ups counts | broad jump_cm | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27.0 | M | 172.3 | 75.24 | 21.3 | 80.0 | 130.0 | 54.9 | 18.4 | 60.0 | 217.0 | C |
1 | 25.0 | M | 165.0 | 55.80 | 15.7 | 77.0 | 126.0 | 36.4 | 16.3 | 53.0 | 229.0 | A |
2 | 31.0 | M | 179.6 | 78.00 | 20.1 | 92.0 | 152.0 | 44.8 | 12.0 | 49.0 | 181.0 | C |
3 | 32.0 | M | 174.5 | 71.10 | 18.4 | 76.0 | 147.0 | 41.4 | 15.2 | 53.0 | 219.0 | B |
4 | 28.0 | M | 173.8 | 67.70 | 17.1 | 70.0 | 127.0 | 43.5 | 27.1 | 45.0 | 217.0 | B |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13393 entries, 0 to 13392 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 13393 non-null float64 1 gender 13393 non-null object 2 height_cm 13393 non-null float64 3 weight_kg 13393 non-null float64 4 body fat_% 13393 non-null float64 5 diastolic 13393 non-null float64 6 systolic 13393 non-null float64 7 gripForce 13393 non-null float64 8 sit and bend forward_cm 13393 non-null float64 9 sit-ups counts 13393 non-null float64 10 broad jump_cm 13393 non-null float64 11 class 13393 non-null object dtypes: float64(10), object(2) memory usage: 1.2+ MB
Przygotowanie danych
df.isna().sum()
age 0 gender 0 height_cm 0 weight_kg 0 body fat_% 0 diastolic 0 systolic 0 gripForce 0 sit and bend forward_cm 0 sit-ups counts 0 broad jump_cm 0 class 0 dtype: int64
df['gender'].describe()
count 13393 unique 2 top M freq 8467 Name: gender, dtype: object
df = pd.get_dummies(df, columns=['gender'])
df.head()
age | height_cm | weight_kg | body fat_% | diastolic | systolic | gripForce | sit and bend forward_cm | sit-ups counts | broad jump_cm | class | gender_F | gender_M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27.0 | 172.3 | 75.24 | 21.3 | 80.0 | 130.0 | 54.9 | 18.4 | 60.0 | 217.0 | C | False | True |
1 | 25.0 | 165.0 | 55.80 | 15.7 | 77.0 | 126.0 | 36.4 | 16.3 | 53.0 | 229.0 | A | False | True |
2 | 31.0 | 179.6 | 78.00 | 20.1 | 92.0 | 152.0 | 44.8 | 12.0 | 49.0 | 181.0 | C | False | True |
3 | 32.0 | 174.5 | 71.10 | 18.4 | 76.0 | 147.0 | 41.4 | 15.2 | 53.0 | 219.0 | B | False | True |
4 | 28.0 | 173.8 | 67.70 | 17.1 | 70.0 | 127.0 | 43.5 | 27.1 | 45.0 | 217.0 | B | False | True |
df['gender_F'] = df['gender_F'].map({True:1, False:0})
df['gender_M'] = df['gender_M'].map({True:1, False:0})
df.head()
age | height_cm | weight_kg | body fat_% | diastolic | systolic | gripForce | sit and bend forward_cm | sit-ups counts | broad jump_cm | class | gender_F | gender_M | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27.0 | 172.3 | 75.24 | 21.3 | 80.0 | 130.0 | 54.9 | 18.4 | 60.0 | 217.0 | C | 0 | 1 |
1 | 25.0 | 165.0 | 55.80 | 15.7 | 77.0 | 126.0 | 36.4 | 16.3 | 53.0 | 229.0 | A | 0 | 1 |
2 | 31.0 | 179.6 | 78.00 | 20.1 | 92.0 | 152.0 | 44.8 | 12.0 | 49.0 | 181.0 | C | 0 | 1 |
3 | 32.0 | 174.5 | 71.10 | 18.4 | 76.0 | 147.0 | 41.4 | 15.2 | 53.0 | 219.0 | B | 0 | 1 |
4 | 28.0 | 173.8 | 67.70 | 17.1 | 70.0 | 127.0 | 43.5 | 27.1 | 45.0 | 217.0 | B | 0 | 1 |
Podział na zbiory uczące i testowe oraz skalowanie
y = df['class']
X = df.drop('class', axis =1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train.shape[0]
10714
X_test.shape[0]
2679
Regresja logistyczna
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
logistic_predicts = logistic_model.predict(X_test)
cr = classification_report(y_test, logistic_predicts)
accuracy = accuracy_score(y_test, logistic_predicts)
print(cr)
print('accuracy: ',accuracy)
precision recall f1-score support A 0.70 0.71 0.71 685 B 0.45 0.44 0.45 662 C 0.52 0.53 0.53 650 D 0.79 0.78 0.78 682 accuracy 0.62 2679 macro avg 0.62 0.62 0.62 2679 weighted avg 0.62 0.62 0.62 2679 accuracy: 0.6188876446435237
Naiwny klasyfikator Bayesa
bayes_model = GaussianNB().fit(X_train, y_train)
bayes_predicts = bayes_model.predict(X_test)
report = classification_report(y_test, bayes_predicts)
accuracy = accuracy_score(y_test, bayes_predicts)
print(report)
print('accuracy: ',accuracy)
precision recall f1-score support A 0.59 0.74 0.66 685 B 0.41 0.30 0.34 662 C 0.46 0.46 0.46 650 D 0.68 0.69 0.68 682 accuracy 0.55 2679 macro avg 0.53 0.55 0.54 2679 weighted avg 0.54 0.55 0.54 2679 accuracy: 0.5487122060470325
Klasyfikator najbliższych sąsiadów
KNN = KNeighborsClassifier(n_neighbors=8).fit(X_train, y_train)
knn_predicts = KNN.predict(X_test)
report = classification_report(y_test, knn_predicts)
accuracy = accuracy_score(y_test, knn_predicts)
print(report)
print('accuracy: ',accuracy)
precision recall f1-score support A 0.62 0.82 0.71 685 B 0.43 0.47 0.45 662 C 0.56 0.51 0.54 650 D 0.91 0.64 0.75 682 accuracy 0.61 2679 macro avg 0.63 0.61 0.61 2679 weighted avg 0.64 0.61 0.61 2679 accuracy: 0.6114221724524076
Drzewo decyzyjne
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
tree_model = DecisionTreeClassifier(max_depth=9).fit(X_train,y_train)
tree_predicts = tree_model.predict(X_test)
report = classification_report(y_test, tree_predicts)
accuracy = accuracy_score(y_test, tree_predicts)
print(report)
print('accuracy: ',accuracy)
precision recall f1-score support 0 0.69 0.82 0.75 685 1 0.54 0.57 0.56 662 2 0.66 0.59 0.62 650 3 0.88 0.76 0.81 682 accuracy 0.69 2679 macro avg 0.69 0.69 0.69 2679 weighted avg 0.70 0.69 0.69 2679 accuracy: 0.6875699888017918
Sieć neuronowa
model = keras.Sequential(
[
keras.Input(shape=(12,)),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(4, activation="softmax"),
]
)
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 128) 1664 dense_1 (Dense) (None, 128) 16512 dense_2 (Dense) (None, 128) 16512 dense_3 (Dense) (None, 4) 516 ================================================================= Total params: 35,204 Trainable params: 35,204 Non-trainable params: 0 _________________________________________________________________
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X_train, y_train, batch_size=128, epochs=50, validation_split=0.1)
Epoch 1/50 76/76 [==============================] - 1s 7ms/step - loss: 0.9884 - accuracy: 0.5528 - val_loss: 0.8428 - val_accuracy: 0.6287 Epoch 2/50 76/76 [==============================] - 0s 3ms/step - loss: 0.8034 - accuracy: 0.6459 - val_loss: 0.7855 - val_accuracy: 0.6670 Epoch 3/50 76/76 [==============================] - 0s 3ms/step - loss: 0.7376 - accuracy: 0.6876 - val_loss: 0.7341 - val_accuracy: 0.6931 Epoch 4/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6971 - accuracy: 0.7119 - val_loss: 0.7143 - val_accuracy: 0.7146 Epoch 5/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6715 - accuracy: 0.7206 - val_loss: 0.6984 - val_accuracy: 0.7201 Epoch 6/50 76/76 [==============================] - 0s 2ms/step - loss: 0.6511 - accuracy: 0.7301 - val_loss: 0.6901 - val_accuracy: 0.7164 Epoch 7/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6375 - accuracy: 0.7359 - val_loss: 0.6782 - val_accuracy: 0.7285 Epoch 8/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6239 - accuracy: 0.7410 - val_loss: 0.7001 - val_accuracy: 0.7062 Epoch 9/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6144 - accuracy: 0.7483 - val_loss: 0.6578 - val_accuracy: 0.7304 Epoch 10/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6059 - accuracy: 0.7492 - val_loss: 0.6606 - val_accuracy: 0.7183 Epoch 11/50 76/76 [==============================] - 0s 3ms/step - loss: 0.6000 - accuracy: 0.7541 - val_loss: 0.6597 - val_accuracy: 0.7341 Epoch 12/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5942 - accuracy: 0.7582 - val_loss: 0.6617 - val_accuracy: 0.7183 Epoch 13/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5856 - accuracy: 0.7586 - val_loss: 0.6732 - val_accuracy: 0.7285 Epoch 14/50 76/76 [==============================] - 0s 5ms/step - loss: 0.5807 - accuracy: 0.7627 - val_loss: 0.6709 - val_accuracy: 0.7369 Epoch 15/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5731 - accuracy: 0.7659 - val_loss: 0.6618 - val_accuracy: 0.7416 Epoch 16/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5665 - accuracy: 0.7694 - val_loss: 0.6483 - val_accuracy: 0.7463 Epoch 17/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5620 - accuracy: 0.7694 - val_loss: 0.6635 - val_accuracy: 0.7220 Epoch 18/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5555 - accuracy: 0.7709 - val_loss: 0.6493 - val_accuracy: 0.7425 Epoch 19/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5523 - accuracy: 0.7715 - val_loss: 0.6649 - val_accuracy: 0.7313 Epoch 20/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5489 - accuracy: 0.7771 - val_loss: 0.6862 - val_accuracy: 0.7164 Epoch 21/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5409 - accuracy: 0.7801 - val_loss: 0.6567 - val_accuracy: 0.7435 Epoch 22/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5373 - accuracy: 0.7784 - val_loss: 0.6638 - val_accuracy: 0.7285 Epoch 23/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5316 - accuracy: 0.7838 - val_loss: 0.6582 - val_accuracy: 0.7407 Epoch 24/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5308 - accuracy: 0.7857 - val_loss: 0.6762 - val_accuracy: 0.7285 Epoch 25/50 76/76 [==============================] - 0s 2ms/step - loss: 0.5279 - accuracy: 0.7844 - val_loss: 0.6775 - val_accuracy: 0.7229 Epoch 26/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5165 - accuracy: 0.7884 - val_loss: 0.6599 - val_accuracy: 0.7257 Epoch 27/50 76/76 [==============================] - 0s 2ms/step - loss: 0.5186 - accuracy: 0.7899 - val_loss: 0.6610 - val_accuracy: 0.7369 Epoch 28/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5117 - accuracy: 0.7903 - val_loss: 0.6590 - val_accuracy: 0.7388 Epoch 29/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5162 - accuracy: 0.7883 - val_loss: 0.6800 - val_accuracy: 0.7211 Epoch 30/50 76/76 [==============================] - 0s 3ms/step - loss: 0.5048 - accuracy: 0.7945 - val_loss: 0.6595 - val_accuracy: 0.7379 Epoch 31/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4991 - accuracy: 0.7955 - val_loss: 0.6911 - val_accuracy: 0.7146 Epoch 32/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4927 - accuracy: 0.8012 - val_loss: 0.6630 - val_accuracy: 0.7332 Epoch 33/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4943 - accuracy: 0.8029 - val_loss: 0.6822 - val_accuracy: 0.7285 Epoch 34/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4825 - accuracy: 0.8061 - val_loss: 0.6837 - val_accuracy: 0.7285 Epoch 35/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4813 - accuracy: 0.8067 - val_loss: 0.6821 - val_accuracy: 0.7267 Epoch 36/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4830 - accuracy: 0.8034 - val_loss: 0.6730 - val_accuracy: 0.7379 Epoch 37/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4725 - accuracy: 0.8099 - val_loss: 0.7114 - val_accuracy: 0.7295 Epoch 38/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4729 - accuracy: 0.8085 - val_loss: 0.7065 - val_accuracy: 0.7183 Epoch 39/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4679 - accuracy: 0.8136 - val_loss: 0.6859 - val_accuracy: 0.7248 Epoch 40/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4683 - accuracy: 0.8111 - val_loss: 0.7185 - val_accuracy: 0.7108 Epoch 41/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4571 - accuracy: 0.8174 - val_loss: 0.7018 - val_accuracy: 0.7276 Epoch 42/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4459 - accuracy: 0.8218 - val_loss: 0.7098 - val_accuracy: 0.7127 Epoch 43/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4492 - accuracy: 0.8227 - val_loss: 0.7279 - val_accuracy: 0.7248 Epoch 44/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4493 - accuracy: 0.8253 - val_loss: 0.7087 - val_accuracy: 0.7211 Epoch 45/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4405 - accuracy: 0.8242 - val_loss: 0.7006 - val_accuracy: 0.7155 Epoch 46/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4327 - accuracy: 0.8279 - val_loss: 0.7278 - val_accuracy: 0.7155 Epoch 47/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4324 - accuracy: 0.8272 - val_loss: 0.7120 - val_accuracy: 0.7267 Epoch 48/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4275 - accuracy: 0.8313 - val_loss: 0.7302 - val_accuracy: 0.7090 Epoch 49/50 76/76 [==============================] - 0s 4ms/step - loss: 0.4230 - accuracy: 0.8335 - val_loss: 0.7484 - val_accuracy: 0.7183 Epoch 50/50 76/76 [==============================] - 0s 3ms/step - loss: 0.4182 - accuracy: 0.8354 - val_loss: 0.7354 - val_accuracy: 0.7164
<keras.callbacks.History at 0x260d1488370>
neural_predicts = model.predict(X_test)
84/84 [==============================] - 0s 1ms/step
neural_predicts_labels = np.argmax(neural_predicts, axis=1)
report = classification_report(y_test, neural_predicts_labels)
accuracy = accuracy_score(y_test, neural_predicts_labels)
print(report)
print('accuracy: ',accuracy)
precision recall f1-score support 0 0.75 0.84 0.79 685 1 0.60 0.65 0.62 662 2 0.72 0.62 0.67 650 3 0.89 0.84 0.86 682 accuracy 0.74 2679 macro avg 0.74 0.74 0.74 2679 weighted avg 0.74 0.74 0.74 2679 accuracy: 0.7375886524822695