Uczenie_maszynowe_projekt/Projekt.ipynb
2024-01-27 23:57:54 +01:00

1317 lines
46 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "54f20002-11a1-4a11-88c4-f3b5bef89a08",
"metadata": {},
"source": [
"Potrzebne importy"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0318fb7f-f6df-4d0d-9d92-61e17793e31a",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"import numpy as np\n",
"from sklearn.metrics import classification_report, accuracy_score \n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.preprocessing import StandardScaler, LabelEncoder"
]
},
{
"cell_type": "markdown",
"id": "2f3e1deb-f7f1-4108-8662-7ad1dc6d6485",
"metadata": {},
"source": [
"Odczytywanie danych"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "522d753b-fb82-4b9b-8d2d-20c7089d1b9c",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('bodyPerformance.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a75cd10b-f1de-460f-b4e3-0b99502d2d11",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>height_cm</th>\n",
" <th>weight_kg</th>\n",
" <th>body fat_%</th>\n",
" <th>diastolic</th>\n",
" <th>systolic</th>\n",
" <th>gripForce</th>\n",
" <th>sit and bend forward_cm</th>\n",
" <th>sit-ups counts</th>\n",
" <th>broad jump_cm</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27.0</td>\n",
" <td>M</td>\n",
" <td>172.3</td>\n",
" <td>75.24</td>\n",
" <td>21.3</td>\n",
" <td>80.0</td>\n",
" <td>130.0</td>\n",
" <td>54.9</td>\n",
" <td>18.4</td>\n",
" <td>60.0</td>\n",
" <td>217.0</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25.0</td>\n",
" <td>M</td>\n",
" <td>165.0</td>\n",
" <td>55.80</td>\n",
" <td>15.7</td>\n",
" <td>77.0</td>\n",
" <td>126.0</td>\n",
" <td>36.4</td>\n",
" <td>16.3</td>\n",
" <td>53.0</td>\n",
" <td>229.0</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>31.0</td>\n",
" <td>M</td>\n",
" <td>179.6</td>\n",
" <td>78.00</td>\n",
" <td>20.1</td>\n",
" <td>92.0</td>\n",
" <td>152.0</td>\n",
" <td>44.8</td>\n",
" <td>12.0</td>\n",
" <td>49.0</td>\n",
" <td>181.0</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>32.0</td>\n",
" <td>M</td>\n",
" <td>174.5</td>\n",
" <td>71.10</td>\n",
" <td>18.4</td>\n",
" <td>76.0</td>\n",
" <td>147.0</td>\n",
" <td>41.4</td>\n",
" <td>15.2</td>\n",
" <td>53.0</td>\n",
" <td>219.0</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28.0</td>\n",
" <td>M</td>\n",
" <td>173.8</td>\n",
" <td>67.70</td>\n",
" <td>17.1</td>\n",
" <td>70.0</td>\n",
" <td>127.0</td>\n",
" <td>43.5</td>\n",
" <td>27.1</td>\n",
" <td>45.0</td>\n",
" <td>217.0</td>\n",
" <td>B</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age gender height_cm weight_kg body fat_% diastolic systolic \\\n",
"0 27.0 M 172.3 75.24 21.3 80.0 130.0 \n",
"1 25.0 M 165.0 55.80 15.7 77.0 126.0 \n",
"2 31.0 M 179.6 78.00 20.1 92.0 152.0 \n",
"3 32.0 M 174.5 71.10 18.4 76.0 147.0 \n",
"4 28.0 M 173.8 67.70 17.1 70.0 127.0 \n",
"\n",
" gripForce sit and bend forward_cm sit-ups counts broad jump_cm class \n",
"0 54.9 18.4 60.0 217.0 C \n",
"1 36.4 16.3 53.0 229.0 A \n",
"2 44.8 12.0 49.0 181.0 C \n",
"3 41.4 15.2 53.0 219.0 B \n",
"4 43.5 27.1 45.0 217.0 B "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8d218c58-3d86-4007-9ec4-253b51f5e003",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 13393 entries, 0 to 13392\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 13393 non-null float64\n",
" 1 gender 13393 non-null object \n",
" 2 height_cm 13393 non-null float64\n",
" 3 weight_kg 13393 non-null float64\n",
" 4 body fat_% 13393 non-null float64\n",
" 5 diastolic 13393 non-null float64\n",
" 6 systolic 13393 non-null float64\n",
" 7 gripForce 13393 non-null float64\n",
" 8 sit and bend forward_cm 13393 non-null float64\n",
" 9 sit-ups counts 13393 non-null float64\n",
" 10 broad jump_cm 13393 non-null float64\n",
" 11 class 13393 non-null object \n",
"dtypes: float64(10), object(2)\n",
"memory usage: 1.2+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"id": "ce463e56-68c0-4a09-a452-d3b50e836eb7",
"metadata": {},
"source": [
"Przygotowanie danych"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1565dcf0-7fb0-4a56-902c-aabd3a5739a4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"age 0\n",
"gender 0\n",
"height_cm 0\n",
"weight_kg 0\n",
"body fat_% 0\n",
"diastolic 0\n",
"systolic 0\n",
"gripForce 0\n",
"sit and bend forward_cm 0\n",
"sit-ups counts 0\n",
"broad jump_cm 0\n",
"class 0\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "23c1fa61-45b3-45d6-9f7a-af4eaca34501",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 13393\n",
"unique 2\n",
"top M\n",
"freq 8467\n",
"Name: gender, dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['gender'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c52e6cd1-7175-4a54-bfb1-865494ce1eae",
"metadata": {},
"outputs": [],
"source": [
"df = pd.get_dummies(df, columns=['gender'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9236fdb8-feb6-4ad4-a5ed-9262ae76b66c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>height_cm</th>\n",
" <th>weight_kg</th>\n",
" <th>body fat_%</th>\n",
" <th>diastolic</th>\n",
" <th>systolic</th>\n",
" <th>gripForce</th>\n",
" <th>sit and bend forward_cm</th>\n",
" <th>sit-ups counts</th>\n",
" <th>broad jump_cm</th>\n",
" <th>class</th>\n",
" <th>gender_F</th>\n",
" <th>gender_M</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27.0</td>\n",
" <td>172.3</td>\n",
" <td>75.24</td>\n",
" <td>21.3</td>\n",
" <td>80.0</td>\n",
" <td>130.0</td>\n",
" <td>54.9</td>\n",
" <td>18.4</td>\n",
" <td>60.0</td>\n",
" <td>217.0</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25.0</td>\n",
" <td>165.0</td>\n",
" <td>55.80</td>\n",
" <td>15.7</td>\n",
" <td>77.0</td>\n",
" <td>126.0</td>\n",
" <td>36.4</td>\n",
" <td>16.3</td>\n",
" <td>53.0</td>\n",
" <td>229.0</td>\n",
" <td>A</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>31.0</td>\n",
" <td>179.6</td>\n",
" <td>78.00</td>\n",
" <td>20.1</td>\n",
" <td>92.0</td>\n",
" <td>152.0</td>\n",
" <td>44.8</td>\n",
" <td>12.0</td>\n",
" <td>49.0</td>\n",
" <td>181.0</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>32.0</td>\n",
" <td>174.5</td>\n",
" <td>71.10</td>\n",
" <td>18.4</td>\n",
" <td>76.0</td>\n",
" <td>147.0</td>\n",
" <td>41.4</td>\n",
" <td>15.2</td>\n",
" <td>53.0</td>\n",
" <td>219.0</td>\n",
" <td>B</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28.0</td>\n",
" <td>173.8</td>\n",
" <td>67.70</td>\n",
" <td>17.1</td>\n",
" <td>70.0</td>\n",
" <td>127.0</td>\n",
" <td>43.5</td>\n",
" <td>27.1</td>\n",
" <td>45.0</td>\n",
" <td>217.0</td>\n",
" <td>B</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age height_cm weight_kg body fat_% diastolic systolic gripForce \\\n",
"0 27.0 172.3 75.24 21.3 80.0 130.0 54.9 \n",
"1 25.0 165.0 55.80 15.7 77.0 126.0 36.4 \n",
"2 31.0 179.6 78.00 20.1 92.0 152.0 44.8 \n",
"3 32.0 174.5 71.10 18.4 76.0 147.0 41.4 \n",
"4 28.0 173.8 67.70 17.1 70.0 127.0 43.5 \n",
"\n",
" sit and bend forward_cm sit-ups counts broad jump_cm class gender_F \\\n",
"0 18.4 60.0 217.0 C False \n",
"1 16.3 53.0 229.0 A False \n",
"2 12.0 49.0 181.0 C False \n",
"3 15.2 53.0 219.0 B False \n",
"4 27.1 45.0 217.0 B False \n",
"\n",
" gender_M \n",
"0 True \n",
"1 True \n",
"2 True \n",
"3 True \n",
"4 True "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "63fdc3e1-2388-4f5b-9a11-5f87c88a28e0",
"metadata": {},
"outputs": [],
"source": [
"df['gender_F'] = df['gender_F'].map({True:1, False:0}) \n",
"df['gender_M'] = df['gender_M'].map({True:1, False:0}) "
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "91f4116a-45f7-4d49-8d88-d8b4c3173019",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>height_cm</th>\n",
" <th>weight_kg</th>\n",
" <th>body fat_%</th>\n",
" <th>diastolic</th>\n",
" <th>systolic</th>\n",
" <th>gripForce</th>\n",
" <th>sit and bend forward_cm</th>\n",
" <th>sit-ups counts</th>\n",
" <th>broad jump_cm</th>\n",
" <th>class</th>\n",
" <th>gender_F</th>\n",
" <th>gender_M</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27.0</td>\n",
" <td>172.3</td>\n",
" <td>75.24</td>\n",
" <td>21.3</td>\n",
" <td>80.0</td>\n",
" <td>130.0</td>\n",
" <td>54.9</td>\n",
" <td>18.4</td>\n",
" <td>60.0</td>\n",
" <td>217.0</td>\n",
" <td>C</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25.0</td>\n",
" <td>165.0</td>\n",
" <td>55.80</td>\n",
" <td>15.7</td>\n",
" <td>77.0</td>\n",
" <td>126.0</td>\n",
" <td>36.4</td>\n",
" <td>16.3</td>\n",
" <td>53.0</td>\n",
" <td>229.0</td>\n",
" <td>A</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>31.0</td>\n",
" <td>179.6</td>\n",
" <td>78.00</td>\n",
" <td>20.1</td>\n",
" <td>92.0</td>\n",
" <td>152.0</td>\n",
" <td>44.8</td>\n",
" <td>12.0</td>\n",
" <td>49.0</td>\n",
" <td>181.0</td>\n",
" <td>C</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>32.0</td>\n",
" <td>174.5</td>\n",
" <td>71.10</td>\n",
" <td>18.4</td>\n",
" <td>76.0</td>\n",
" <td>147.0</td>\n",
" <td>41.4</td>\n",
" <td>15.2</td>\n",
" <td>53.0</td>\n",
" <td>219.0</td>\n",
" <td>B</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28.0</td>\n",
" <td>173.8</td>\n",
" <td>67.70</td>\n",
" <td>17.1</td>\n",
" <td>70.0</td>\n",
" <td>127.0</td>\n",
" <td>43.5</td>\n",
" <td>27.1</td>\n",
" <td>45.0</td>\n",
" <td>217.0</td>\n",
" <td>B</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age height_cm weight_kg body fat_% diastolic systolic gripForce \\\n",
"0 27.0 172.3 75.24 21.3 80.0 130.0 54.9 \n",
"1 25.0 165.0 55.80 15.7 77.0 126.0 36.4 \n",
"2 31.0 179.6 78.00 20.1 92.0 152.0 44.8 \n",
"3 32.0 174.5 71.10 18.4 76.0 147.0 41.4 \n",
"4 28.0 173.8 67.70 17.1 70.0 127.0 43.5 \n",
"\n",
" sit and bend forward_cm sit-ups counts broad jump_cm class gender_F \\\n",
"0 18.4 60.0 217.0 C 0 \n",
"1 16.3 53.0 229.0 A 0 \n",
"2 12.0 49.0 181.0 C 0 \n",
"3 15.2 53.0 219.0 B 0 \n",
"4 27.1 45.0 217.0 B 0 \n",
"\n",
" gender_M \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "00518d17-a95d-491b-8926-cddc73360928",
"metadata": {},
"source": [
"Podział na zbiory uczące i testowe oraz skalowanie"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "efe47699-b999-4432-973a-e6141bb21a76",
"metadata": {},
"outputs": [],
"source": [
"y = df['class']\n",
"X = df.drop('class', axis =1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c252d459-a639-4610-a742-901381c30e5f",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "9e75d7f4-acc5-44d7-8798-64f9db2c5100",
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(X_train)\n",
"X_test = scaler.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "81a2c2d5-9c03-45d4-b12d-f843f52a908a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10714"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "8413b009-25c2-4c18-94ce-96f66262b43b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2679"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.shape[0]"
]
},
{
"cell_type": "markdown",
"id": "6f281ecc-292a-4c4b-a4ac-e73055e48323",
"metadata": {},
"source": [
"Regresja logistyczna"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "c10b7f26-5759-4045-b613-033c0f5ccc2f",
"metadata": {},
"outputs": [],
"source": [
"logistic_model = LogisticRegression()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "3a64150f-f098-4c1e-a089-81bfab9af398",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div>"
],
"text/plain": [
"LogisticRegression()"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"logistic_model.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "c8768890-e826-41d1-a923-6da63f16db6d",
"metadata": {},
"outputs": [],
"source": [
"logistic_predicts = logistic_model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "28456695-9a70-4b63-b92b-1313ee817307",
"metadata": {},
"outputs": [],
"source": [
"cr = classification_report(y_test, logistic_predicts) \n",
"accuracy = accuracy_score(y_test, logistic_predicts)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "985d5347-e9fa-4921-b70b-2d72faceb31d",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" A 0.70 0.71 0.71 685\n",
" B 0.45 0.44 0.45 662\n",
" C 0.52 0.53 0.53 650\n",
" D 0.79 0.78 0.78 682\n",
"\n",
" accuracy 0.62 2679\n",
" macro avg 0.62 0.62 0.62 2679\n",
"weighted avg 0.62 0.62 0.62 2679\n",
"\n",
"accuracy: 0.6188876446435237\n"
]
}
],
"source": [
"print(cr)\n",
"print('accuracy: ',accuracy)"
]
},
{
"cell_type": "markdown",
"id": "e5705678-5aa0-47b2-ba0d-e8892abb6b0a",
"metadata": {},
"source": [
"Naiwny klasyfikator Bayesa"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "58be3b38-3a2f-45b8-a24c-89fedf302a2e",
"metadata": {},
"outputs": [],
"source": [
"bayes_model = GaussianNB().fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "21138174-4740-4ab9-91ed-5b5514e1d6a7",
"metadata": {},
"outputs": [],
"source": [
"bayes_predicts = bayes_model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "9e60479c-8985-424f-aae3-5af4e9da67c7",
"metadata": {},
"outputs": [],
"source": [
"report = classification_report(y_test, bayes_predicts)\n",
"accuracy = accuracy_score(y_test, bayes_predicts)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "643ab5d3-175f-4f2e-9da4-9ebfb0104a36",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" A 0.59 0.74 0.66 685\n",
" B 0.41 0.30 0.34 662\n",
" C 0.46 0.46 0.46 650\n",
" D 0.68 0.69 0.68 682\n",
"\n",
" accuracy 0.55 2679\n",
" macro avg 0.53 0.55 0.54 2679\n",
"weighted avg 0.54 0.55 0.54 2679\n",
"\n",
"accuracy: 0.5487122060470325\n"
]
}
],
"source": [
"print(report)\n",
"print('accuracy: ',accuracy)"
]
},
{
"cell_type": "markdown",
"id": "23b19e79-ada4-4a65-a35d-68c492585d8c",
"metadata": {},
"source": [
"Klasyfikator najbliższych sąsiadów"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "3f482536-8898-4eea-8c85-15a73039a913",
"metadata": {},
"outputs": [],
"source": [
"KNN = KNeighborsClassifier(n_neighbors=8).fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "6bfaf6c4-8e63-4839-9c71-76befae6d82f",
"metadata": {},
"outputs": [],
"source": [
"knn_predicts = KNN.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "4e150a72-110a-4e45-a99f-10f7d93af94e",
"metadata": {},
"outputs": [],
"source": [
"report = classification_report(y_test, knn_predicts)\n",
"accuracy = accuracy_score(y_test, knn_predicts)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "b6a28ccf-a0f5-427c-89e8-8b0bef12519d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" A 0.62 0.82 0.71 685\n",
" B 0.43 0.47 0.45 662\n",
" C 0.56 0.51 0.54 650\n",
" D 0.91 0.64 0.75 682\n",
"\n",
" accuracy 0.61 2679\n",
" macro avg 0.63 0.61 0.61 2679\n",
"weighted avg 0.64 0.61 0.61 2679\n",
"\n",
"accuracy: 0.6114221724524076\n"
]
}
],
"source": [
"print(report)\n",
"print('accuracy: ',accuracy)"
]
},
{
"cell_type": "markdown",
"id": "972dccde-74fe-4353-accd-847eb210c58f",
"metadata": {},
"source": [
"Drzewo decyzyjne"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "e132911c-7b1d-40c1-9ab6-6383b679fd47",
"metadata": {},
"outputs": [],
"source": [
"label_encoder = LabelEncoder()\n",
"y_train = label_encoder.fit_transform(y_train)\n",
"y_test = label_encoder.transform(y_test)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "ff196743-c2c5-4947-bbb8-4b6bad6e9afc",
"metadata": {},
"outputs": [],
"source": [
"tree_model = DecisionTreeClassifier(max_depth=9).fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "d2b9f937-94b0-4a07-b9e4-77607649dee8",
"metadata": {},
"outputs": [],
"source": [
"tree_predicts = tree_model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "0f47c596-6ff5-4808-abab-870b7746e2c2",
"metadata": {},
"outputs": [],
"source": [
"report = classification_report(y_test, tree_predicts)\n",
"accuracy = accuracy_score(y_test, tree_predicts)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "d89eae25-77d9-4f60-b954-743dcbc7535b",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.69 0.82 0.75 685\n",
" 1 0.54 0.57 0.56 662\n",
" 2 0.66 0.59 0.62 650\n",
" 3 0.88 0.76 0.81 682\n",
"\n",
" accuracy 0.69 2679\n",
" macro avg 0.69 0.69 0.69 2679\n",
"weighted avg 0.70 0.69 0.69 2679\n",
"\n",
"accuracy: 0.6875699888017918\n"
]
}
],
"source": [
"print(report)\n",
"print('accuracy: ',accuracy)"
]
},
{
"cell_type": "markdown",
"id": "6972b651-25a0-4a4d-9d26-ae8b0a3e42f8",
"metadata": {},
"source": [
"Sieć neuronowa"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "80945e15-a154-49c2-917c-3c830616b02a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" dense (Dense) (None, 128) 1664 \n",
" \n",
" dense_1 (Dense) (None, 128) 16512 \n",
" \n",
" dense_2 (Dense) (None, 128) 16512 \n",
" \n",
" dense_3 (Dense) (None, 4) 516 \n",
" \n",
"=================================================================\n",
"Total params: 35,204\n",
"Trainable params: 35,204\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = keras.Sequential(\n",
" [\n",
" keras.Input(shape=(12,)),\n",
" layers.Dense(128, activation=\"relu\"),\n",
" layers.Dense(128, activation=\"relu\"),\n",
" layers.Dense(128, activation=\"relu\"),\n",
" layers.Dense(4, activation=\"softmax\"),\n",
" ]\n",
")\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "b9263592-8fec-48bf-b232-0e1a90903cbd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/50\n",
"76/76 [==============================] - 1s 7ms/step - loss: 0.9884 - accuracy: 0.5528 - val_loss: 0.8428 - val_accuracy: 0.6287\n",
"Epoch 2/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.8034 - accuracy: 0.6459 - val_loss: 0.7855 - val_accuracy: 0.6670\n",
"Epoch 3/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.7376 - accuracy: 0.6876 - val_loss: 0.7341 - val_accuracy: 0.6931\n",
"Epoch 4/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6971 - accuracy: 0.7119 - val_loss: 0.7143 - val_accuracy: 0.7146\n",
"Epoch 5/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6715 - accuracy: 0.7206 - val_loss: 0.6984 - val_accuracy: 0.7201\n",
"Epoch 6/50\n",
"76/76 [==============================] - 0s 2ms/step - loss: 0.6511 - accuracy: 0.7301 - val_loss: 0.6901 - val_accuracy: 0.7164\n",
"Epoch 7/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6375 - accuracy: 0.7359 - val_loss: 0.6782 - val_accuracy: 0.7285\n",
"Epoch 8/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6239 - accuracy: 0.7410 - val_loss: 0.7001 - val_accuracy: 0.7062\n",
"Epoch 9/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6144 - accuracy: 0.7483 - val_loss: 0.6578 - val_accuracy: 0.7304\n",
"Epoch 10/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6059 - accuracy: 0.7492 - val_loss: 0.6606 - val_accuracy: 0.7183\n",
"Epoch 11/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.6000 - accuracy: 0.7541 - val_loss: 0.6597 - val_accuracy: 0.7341\n",
"Epoch 12/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5942 - accuracy: 0.7582 - val_loss: 0.6617 - val_accuracy: 0.7183\n",
"Epoch 13/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5856 - accuracy: 0.7586 - val_loss: 0.6732 - val_accuracy: 0.7285\n",
"Epoch 14/50\n",
"76/76 [==============================] - 0s 5ms/step - loss: 0.5807 - accuracy: 0.7627 - val_loss: 0.6709 - val_accuracy: 0.7369\n",
"Epoch 15/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5731 - accuracy: 0.7659 - val_loss: 0.6618 - val_accuracy: 0.7416\n",
"Epoch 16/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5665 - accuracy: 0.7694 - val_loss: 0.6483 - val_accuracy: 0.7463\n",
"Epoch 17/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5620 - accuracy: 0.7694 - val_loss: 0.6635 - val_accuracy: 0.7220\n",
"Epoch 18/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5555 - accuracy: 0.7709 - val_loss: 0.6493 - val_accuracy: 0.7425\n",
"Epoch 19/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5523 - accuracy: 0.7715 - val_loss: 0.6649 - val_accuracy: 0.7313\n",
"Epoch 20/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5489 - accuracy: 0.7771 - val_loss: 0.6862 - val_accuracy: 0.7164\n",
"Epoch 21/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5409 - accuracy: 0.7801 - val_loss: 0.6567 - val_accuracy: 0.7435\n",
"Epoch 22/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5373 - accuracy: 0.7784 - val_loss: 0.6638 - val_accuracy: 0.7285\n",
"Epoch 23/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5316 - accuracy: 0.7838 - val_loss: 0.6582 - val_accuracy: 0.7407\n",
"Epoch 24/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5308 - accuracy: 0.7857 - val_loss: 0.6762 - val_accuracy: 0.7285\n",
"Epoch 25/50\n",
"76/76 [==============================] - 0s 2ms/step - loss: 0.5279 - accuracy: 0.7844 - val_loss: 0.6775 - val_accuracy: 0.7229\n",
"Epoch 26/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5165 - accuracy: 0.7884 - val_loss: 0.6599 - val_accuracy: 0.7257\n",
"Epoch 27/50\n",
"76/76 [==============================] - 0s 2ms/step - loss: 0.5186 - accuracy: 0.7899 - val_loss: 0.6610 - val_accuracy: 0.7369\n",
"Epoch 28/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5117 - accuracy: 0.7903 - val_loss: 0.6590 - val_accuracy: 0.7388\n",
"Epoch 29/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5162 - accuracy: 0.7883 - val_loss: 0.6800 - val_accuracy: 0.7211\n",
"Epoch 30/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.5048 - accuracy: 0.7945 - val_loss: 0.6595 - val_accuracy: 0.7379\n",
"Epoch 31/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4991 - accuracy: 0.7955 - val_loss: 0.6911 - val_accuracy: 0.7146\n",
"Epoch 32/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4927 - accuracy: 0.8012 - val_loss: 0.6630 - val_accuracy: 0.7332\n",
"Epoch 33/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4943 - accuracy: 0.8029 - val_loss: 0.6822 - val_accuracy: 0.7285\n",
"Epoch 34/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4825 - accuracy: 0.8061 - val_loss: 0.6837 - val_accuracy: 0.7285\n",
"Epoch 35/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4813 - accuracy: 0.8067 - val_loss: 0.6821 - val_accuracy: 0.7267\n",
"Epoch 36/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4830 - accuracy: 0.8034 - val_loss: 0.6730 - val_accuracy: 0.7379\n",
"Epoch 37/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4725 - accuracy: 0.8099 - val_loss: 0.7114 - val_accuracy: 0.7295\n",
"Epoch 38/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4729 - accuracy: 0.8085 - val_loss: 0.7065 - val_accuracy: 0.7183\n",
"Epoch 39/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4679 - accuracy: 0.8136 - val_loss: 0.6859 - val_accuracy: 0.7248\n",
"Epoch 40/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4683 - accuracy: 0.8111 - val_loss: 0.7185 - val_accuracy: 0.7108\n",
"Epoch 41/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4571 - accuracy: 0.8174 - val_loss: 0.7018 - val_accuracy: 0.7276\n",
"Epoch 42/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4459 - accuracy: 0.8218 - val_loss: 0.7098 - val_accuracy: 0.7127\n",
"Epoch 43/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4492 - accuracy: 0.8227 - val_loss: 0.7279 - val_accuracy: 0.7248\n",
"Epoch 44/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4493 - accuracy: 0.8253 - val_loss: 0.7087 - val_accuracy: 0.7211\n",
"Epoch 45/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4405 - accuracy: 0.8242 - val_loss: 0.7006 - val_accuracy: 0.7155\n",
"Epoch 46/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4327 - accuracy: 0.8279 - val_loss: 0.7278 - val_accuracy: 0.7155\n",
"Epoch 47/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4324 - accuracy: 0.8272 - val_loss: 0.7120 - val_accuracy: 0.7267\n",
"Epoch 48/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4275 - accuracy: 0.8313 - val_loss: 0.7302 - val_accuracy: 0.7090\n",
"Epoch 49/50\n",
"76/76 [==============================] - 0s 4ms/step - loss: 0.4230 - accuracy: 0.8335 - val_loss: 0.7484 - val_accuracy: 0.7183\n",
"Epoch 50/50\n",
"76/76 [==============================] - 0s 3ms/step - loss: 0.4182 - accuracy: 0.8354 - val_loss: 0.7354 - val_accuracy: 0.7164\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x260d1488370>"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
"model.fit(X_train, y_train, batch_size=128, epochs=50, validation_split=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "28fb38de-bff7-4a37-8184-f82078378427",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"84/84 [==============================] - 0s 1ms/step\n"
]
}
],
"source": [
"neural_predicts = model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "2517e17d-522e-455a-a9bd-3872b44a256a",
"metadata": {},
"outputs": [],
"source": [
"neural_predicts_labels = np.argmax(neural_predicts, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "d2030824-2a46-494e-973c-7913536cb651",
"metadata": {},
"outputs": [],
"source": [
"report = classification_report(y_test, neural_predicts_labels)\n",
"accuracy = accuracy_score(y_test, neural_predicts_labels)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "43a2b294-ea59-4dbb-9515-1496e3ade233",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.75 0.84 0.79 685\n",
" 1 0.60 0.65 0.62 662\n",
" 2 0.72 0.62 0.67 650\n",
" 3 0.89 0.84 0.86 682\n",
"\n",
" accuracy 0.74 2679\n",
" macro avg 0.74 0.74 0.74 2679\n",
"weighted avg 0.74 0.74 0.74 2679\n",
"\n",
"accuracy: 0.7375886524822695\n"
]
}
],
"source": [
"print(report)\n",
"print('accuracy: ',accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba943951-ad7c-4a1a-b08f-a9a1613d8379",
"metadata": {},
"outputs": [],
"source": [
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}