fantastyczne_gole/notebooks/xgboost_dla_xG.ipynb

1221 lines
188 KiB
Plaintext
Raw Normal View History

2023-12-12 15:22:01 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Importy"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, cross_val_score\n",
"from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score\n",
"from sklearn.metrics import precision_score, recall_score, accuracy_score\n",
"import time"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Wczytanie danych"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('data4.csv')"
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {},
"outputs": [],
"source": [
"y = pd.DataFrame(df['isGoal'])\n",
"X = df.drop(['isGoal'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>match_minute</th>\n",
" <th>match_second</th>\n",
" <th>position_x</th>\n",
" <th>position_y</th>\n",
" <th>play_type</th>\n",
" <th>BodyPart</th>\n",
" <th>Number_Intervening_Opponents</th>\n",
" <th>Number_Intervening_Teammates</th>\n",
" <th>Interference_on_Shooter</th>\n",
" <th>outcome</th>\n",
" <th>...</th>\n",
" <th>Interference_on_Shooter_Code</th>\n",
" <th>distance_to_goalM</th>\n",
" <th>distance_to_centerM</th>\n",
" <th>angle</th>\n",
" <th>isFoot</th>\n",
" <th>isHead</th>\n",
" <th>header_distance_to_goalM</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Medium</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>54</td>\n",
" <td>23.69</td>\n",
" <td>4.99</td>\n",
" <td>Open Play</td>\n",
" <td>Left</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>Medium</td>\n",
" <td>Missed</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>24.212265</td>\n",
" <td>5.001769</td>\n",
" <td>11.922004</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>33</td>\n",
" <td>28.93</td>\n",
" <td>-11.22</td>\n",
" <td>Open Play</td>\n",
" <td>Left</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>Low</td>\n",
" <td>Missed</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>31.039134</td>\n",
" <td>11.246462</td>\n",
" <td>21.243463</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>61</td>\n",
" <td>25</td>\n",
" <td>9.98</td>\n",
" <td>-5.24</td>\n",
" <td>Open Play</td>\n",
" <td>Head</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>High</td>\n",
" <td>Missed</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>11.277751</td>\n",
" <td>5.252358</td>\n",
" <td>27.757313</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>11.277751</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>73</td>\n",
" <td>45</td>\n",
" <td>4.49</td>\n",
" <td>-5.74</td>\n",
" <td>Open Play</td>\n",
" <td>Right</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>Low</td>\n",
" <td>Missed</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>7.298171</td>\n",
" <td>5.753538</td>\n",
" <td>52.031899</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>40</td>\n",
" <td>7.98</td>\n",
" <td>-12.97</td>\n",
" <td>Open Play</td>\n",
" <td>Right</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Medium</td>\n",
" <td>Saved</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>15.254368</td>\n",
" <td>13.000590</td>\n",
" <td>58.457635</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" match_minute match_second position_x position_y play_type BodyPart \\\n",
"0 29 54 23.69 4.99 Open Play Left \n",
"1 11 33 28.93 -11.22 Open Play Left \n",
"2 61 25 9.98 -5.24 Open Play Head \n",
"3 73 45 4.49 -5.74 Open Play Right \n",
"4 44 40 7.98 -12.97 Open Play Right \n",
"\n",
" Number_Intervening_Opponents Number_Intervening_Teammates \\\n",
"0 4 2 \n",
"1 4 1 \n",
"2 3 1 \n",
"3 2 0 \n",
"4 1 0 \n",
"\n",
" Interference_on_Shooter outcome ... Interference_on_Shooter_Code \\\n",
"0 Medium Missed ... 2 \n",
"1 Low Missed ... 1 \n",
"2 High Missed ... 3 \n",
"3 Low Missed ... 1 \n",
"4 Medium Saved ... 2 \n",
"\n",
" distance_to_goalM distance_to_centerM angle isFoot isHead \\\n",
"0 24.212265 5.001769 11.922004 1 0 \n",
"1 31.039134 11.246462 21.243463 1 0 \n",
"2 11.277751 5.252358 27.757313 0 1 \n",
"3 7.298171 5.753538 52.031899 1 0 \n",
"4 15.254368 13.000590 58.457635 1 0 \n",
"\n",
" header_distance_to_goalM High Low Medium \n",
"0 0.000000 0 0 1 \n",
"1 0.000000 0 1 0 \n",
"2 11.277751 1 0 0 \n",
"3 0.000000 0 1 0 \n",
"4 0.000000 0 0 1 \n",
"\n",
"[5 rows x 29 columns]"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>isGoal</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" isGoal\n",
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0"
]
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Przygotowanie danych"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Uwzględnienie wybranych cech: \n",
"- Współrzędna x strzelającego,\n",
"- Współrzędna y strzelającego,\n",
"- Dystans do bramki,\n",
"- Kąt do bramki,\n",
"- Minuta meczu,\n",
"- Liczba przeciwników przed piłką,\n",
"- Liczba zawodników ze swojej drużyny przed piłką,\n",
"- Część ciała."
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['match_minute', 'match_second', 'position_x', 'position_y', 'play_type',\n",
" 'BodyPart', 'Number_Intervening_Opponents',\n",
" 'Number_Intervening_Teammates', 'Interference_on_Shooter', 'outcome',\n",
" 'position_xM', 'position_yM', 'position_xM_r', 'position_yM_r',\n",
" 'position_xM_std', 'position_yM_std', 'position_xM_std_r',\n",
" 'position_yM_std_r', 'BodyPartCode', 'Interference_on_Shooter_Code',\n",
" 'distance_to_goalM', 'distance_to_centerM', 'angle', 'isFoot', 'isHead',\n",
" 'header_distance_to_goalM', 'High', 'Low', 'Medium'],\n",
" dtype='object')"
]
},
"execution_count": 169,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.columns"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {},
"outputs": [],
"source": [
"X_extracted = X[['position_x', \n",
" 'position_y',\n",
" 'distance_to_goalM', \n",
" 'angle', \n",
" 'match_minute', \n",
" 'Number_Intervening_Opponents', \n",
" 'Number_Intervening_Teammates', \n",
" 'isFoot', \n",
" 'isHead']]"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\s478991\\AppData\\Local\\temp\\ipykernel_3956\\2392787789.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_extracted['isFoot'] = X_extracted['isFoot'].astype('category')\n",
"C:\\Users\\s478991\\AppData\\Local\\temp\\ipykernel_3956\\2392787789.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_extracted['isHead'] = X_extracted['isHead'].astype('category')\n"
]
}
],
"source": [
"X_extracted['isFoot'] = X_extracted['isFoot'].astype('category')\n",
"X_extracted['isHead'] = X_extracted['isHead'].astype('category')"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>position_x</th>\n",
" <th>position_y</th>\n",
" <th>distance_to_goalM</th>\n",
" <th>angle</th>\n",
" <th>match_minute</th>\n",
" <th>Number_Intervening_Opponents</th>\n",
" <th>Number_Intervening_Teammates</th>\n",
" <th>isFoot</th>\n",
" <th>isHead</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>23.69</td>\n",
" <td>4.99</td>\n",
" <td>24.212265</td>\n",
" <td>11.922004</td>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>28.93</td>\n",
" <td>-11.22</td>\n",
" <td>31.039134</td>\n",
" <td>21.243463</td>\n",
" <td>11</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9.98</td>\n",
" <td>-5.24</td>\n",
" <td>11.277751</td>\n",
" <td>27.757313</td>\n",
" <td>61</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.49</td>\n",
" <td>-5.74</td>\n",
" <td>7.298171</td>\n",
" <td>52.031899</td>\n",
" <td>73</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.98</td>\n",
" <td>-12.97</td>\n",
" <td>15.254368</td>\n",
" <td>58.457635</td>\n",
" <td>44</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" position_x position_y distance_to_goalM angle match_minute \\\n",
"0 23.69 4.99 24.212265 11.922004 29 \n",
"1 28.93 -11.22 31.039134 21.243463 11 \n",
"2 9.98 -5.24 11.277751 27.757313 61 \n",
"3 4.49 -5.74 7.298171 52.031899 73 \n",
"4 7.98 -12.97 15.254368 58.457635 44 \n",
"\n",
" Number_Intervening_Opponents Number_Intervening_Teammates isFoot isHead \n",
"0 4 2 1 0 \n",
"1 4 1 1 0 \n",
"2 3 1 0 1 \n",
"3 2 0 1 0 \n",
"4 1 0 1 0 "
]
},
"execution_count": 172,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_extracted.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Podział danych na zbiór treningowy oraz zbiór testowy"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X_extracted, y, test_size=0.2, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {},
"outputs": [],
"source": [
"cv_outer = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n",
"cv_inner = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Oddane strzały w zbiorze danych: 7226\n",
"Gole trafione w zbiorze danych: 906\n"
]
}
],
"source": [
"count_class_0, count_class_1 = y_train.value_counts()\n",
"print ('Oddane strzały w zbiorze danych: ', count_class_0)\n",
"print ('Gole trafione w zbiorze danych: ', count_class_1)"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7.975717439293598"
]
},
"execution_count": 176,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Class imbalance in training data\n",
"\n",
"scale_pos_weight = count_class_0 / count_class_1\n",
"scale_pos_weight"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Trening danych"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [],
"source": [
"from xgboost import XGBClassifier"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {},
"outputs": [],
"source": [
"# Define the xgboost model\n",
"xgb_model = XGBClassifier(enable_categorical=True, tree_method='hist', objective='binary:logistic')"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"# Defining the hyper-parameter grid for XG Boost\n",
"param_grid_xgb = {'learning_rate': [0.01, 0.001, 0.0001],\n",
" 'max_depth': [3, 5, 7, 8, 9],\n",
" 'n_estimators': [100, 150, 200, 250, 300],\n",
" 'scale_pos_weight': [1, scale_pos_weight]}"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [],
"source": [
"start_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [],
"source": [
"# Perform nested cross-validation with grid search\n",
"\n",
"grid_xg = GridSearchCV(xgb_model, param_grid=param_grid_xgb, cv=cv_inner, scoring='f1', n_jobs=-1)\n",
"scores_xg = cross_val_score(grid_xg, X_train, y_train, cv=cv_outer, scoring='f1', n_jobs=-1)"
]
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {},
"outputs": [],
"source": [
"# Fit the best model on the entire training set\n",
"grid_xg.fit(X_train, y_train)\n",
"best_xgb_model = grid_xg.best_estimator_"
]
},
{
"cell_type": "code",
"execution_count": 183,
"metadata": {},
"outputs": [],
"source": [
"# Stopping the timer\n",
"stop_time = time.time()\n",
"\n",
"# Training Time\n",
"xgb_training_time = stop_time - start_time"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 100, 'scale_pos_weight': 7.975717439293598}\n",
"Model Training Time: 677.443 seconds\n"
]
}
],
"source": [
"# Print the best parameters and training time\n",
"print(\"Best parameters: \", grid_xg.best_params_)\n",
"print (f\"Model Training Time: {xgb_training_time:.3f} seconds\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Ewaluacja modelu"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dane treningowe"
]
},
{
"cell_type": "code",
"execution_count": 185,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Confusion Matrix - Train Set')"
]
},
"execution_count": 185,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiQAAAHHCAYAAACPy0PBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABJHklEQVR4nO3deVwV9f7H8fcB5YisorKVC2Yu5JZYipSmkmho7mVZ4lY3Q80141bmUlK2mGZqtmGLZVaaSy6kqZm4ZKHmlqZFpeCKuALC/P7w57meUA/YGQ/h6/l4zONxmfme73xnlO7bz/c7cyyGYRgCAABwITdXDwAAAIBAAgAAXI5AAgAAXI5AAgAAXI5AAgAAXI5AAgAAXI5AAgAAXI5AAgAAXI5AAgAAXI5Agn+N3bt3q3Xr1vLz85PFYtG8efOc2v9vv/0mi8WipKQkp/b7b3bXXXfprrvucvUwTMGfN1C8EEhQJL/++qv+85//qFq1aipTpox8fX0VFRWlSZMm6cyZM6aeOy4uTlu3btULL7ygDz/8UI0aNTL1fNdSr169ZLFY5Ovre8n7uHv3blksFlksFr3yyitF7n///v0aPXq0UlNTnTBac40ePdp2rVfaimNQys/P1wcffKDGjRsrICBAPj4+qlGjhnr27Kl169YVub/Tp09r9OjRWrlypfMHCxQzpVw9APx7LFq0SN26dZPValXPnj1Vp04d5eTkaM2aNRoxYoS2bdumGTNmmHLuM2fOKCUlRU8//bQGDBhgyjmqVKmiM2fOqHTp0qb070ipUqV0+vRpLViwQPfdd5/dsY8//lhlypTR2bNnr6rv/fv3a8yYMapataoaNGhQ6M8tW7bsqs73T3Tu3FnVq1e3/Xzy5En1799fnTp1UufOnW37g4KC/tF5zPjzHjRokN5880116NBBPXr0UKlSpbRr1y4tXrxY1apVU5MmTYrU3+nTpzVmzBhJKpYBDHAmAgkKZd++ferevbuqVKmiFStWKCQkxHYsPj5ee/bs0aJFi0w7/6FDhyRJ/v7+pp3DYrGoTJkypvXviNVqVVRUlD755JMCgWTWrFmKjY3VF198cU3Gcvr0aZUtW1YeHh7X5HwXq1evnurVq2f7+fDhw+rfv7/q1aunhx566LKfO3v2rDw8POTmVrjCr7P/vDMyMjR16lQ98sgjBYL566+/bvs7DODSmLJBoUyYMEEnT57Uu+++axdGLqhevbqeeOIJ28/nzp3TuHHjdNNNN8lqtapq1ar673//q+zsbLvPVa1aVe3atdOaNWt0++23q0yZMqpWrZo++OADW5vRo0erSpUqkqQRI0bIYrGoatWqks5PdVz43xe7UPa/WHJysu644w75+/vL29tbNWvW1H//+1/b8cutKVixYoXuvPNOeXl5yd/fXx06dNCOHTsueb49e/aoV69e8vf3l5+fn3r37q3Tp09f/sb+zYMPPqjFixcrMzPTtm/jxo3avXu3HnzwwQLtjx49quHDh6tu3bry9vaWr6+v2rZtq82bN9varFy5UrfddpskqXfv3rYpjwvXedddd6lOnTratGmTmjVrprJly9ruy9/XkMTFxalMmTIFrj8mJkblypXT/v37C32t/8TKlStlsVj06aef6plnntENN9ygsmXLKisrq1D3RLr0n3evXr3k7e2tv/76Sx07dpS3t7cqVqyo4cOHKy8v74pj2rdvnwzDUFRUVIFjFotFgYGBdvsyMzM1ePBgVapUSVarVdWrV9dLL72k/Px82/gqVqwoSRozZoztz2306NFXcceA4o8KCQplwYIFqlatmpo2bVqo9v369dPMmTPVtWtXDRs2TOvXr1diYqJ27NihuXPn2rXds2ePunbtqr59+youLk7vvfeeevXqpYiICN1yyy3q3Lmz/P39NWTIED3wwAO655575O3tXaTxb9u2Te3atVO9evU0duxYWa1W7dmzR99///0VP/fNN9+obdu2qlatmkaPHq0zZ87ojTfeUFRUlH788ccCYei+++5TWFiYEhMT9eOPP+qdd95RYGCgXnrppUKNs3Pnznrsscf05Zdfqk+fPpLOV0dq1aqlhg0bFmi/d+9ezZs3T926dVNYWJgyMjL01ltvqXnz5tq+fbtCQ0NVu3ZtjR07VqNGjdKjjz6qO++8U5Ls/iyPHDmitm3bqnv37nrooYcuOx0yadIkrVixQnFxcUpJSZG7u7veeustLVu2TB9++KFCQ0MLdZ3OMm7cOHl4eGj48OHKzs6Wh4eHtm/f7vCeXEleXp5iYmLUuHFjvfLKK/rmm2/06quv6qabblL//v0v+7kLoXnOnDnq1q2bypYte9m2p0+fVvPmzfXXX3/pP//5jypXrqy1a9cqISFBBw4c0Ouvv66KFStq2rRpBaarLq4eASWKAThw/PhxQ5LRoUOHQrVPTU01JBn9+vWz2z98+HBDkrFixQrbvipVqhiSjNWrV9v2HTx40LBarcawYcNs+/bt22dIMl5++WW7PuPi4owqVaoUGMNzzz1nXPzXe+LEiYYk49ChQ5cd94VzvP/++7Z9DRo0MAIDA40jR47Y9m3evNlwc3MzevbsWeB8ffr0seuzU6dORvny5S97zouvw8vLyzAMw+jatavRqlUrwzAMIy8vzwgODjbGjBlzyXtw9uxZIy8vr8B1WK1WY+zYsbZ9GzduLHBtFzRv3tyQZEyfPv2Sx5o3b263b+nSpYYk4/nnnzf27t1reHt7Gx07dnR4jVfr0KFDhiTjueees+379ttvDUlGtWrVjNOnT9u1L+w9udSfd1xcnCHJrp1hGMatt95qREREOBxrz549DUlGuXLljE6dOhmvvPKKsWPHjgLtxo0bZ3h5eRm//PKL3f6nnnrKcHd3N9LS0i577UBJxZQNHMrKypIk+fj4FKr9119/LUkaOnSo3f5hw4ZJUoG1JuHh4bZ/tUtSxYoVVbNmTe3du/eqx/x3F9aefPXVV7aSuCMHDhxQamqqevXqpYCAANv+evXq6e6777Zd58Uee+wxu5/vvPNOHTlyxHYPC+PBBx/UypUrlZ6erhUrVig9Pf2S0zXS+XUnF9ZM5OXl6ciRI7bpqB9//LHQ57Rarerdu3eh2rZu3Vr/+c9/NHbsWHXu3FllypTRW2+9VehzOVNcXJw8PT3t9jnjnlzqz7Ewfx/ff/99TZkyRWFhYZo7d66GDx+u2rVrq1WrVvrrr79s7ebMmaM777xT5cqV0+HDh21bdHS08vLytHr16kKNEyhJCCRwyNfXV5J04sSJQrX//fff5ebmZvekhCQFBwfL399fv//+u93+ypUrF+ijXLlyOnbs2FWOuKD7779fUVFR6tevn4KCgtS9e3d99tlnVwwnF8ZZs2bNAsdq166tw4cP69SpU3b7/34t5cqVk6QiXcs999wjHx8fzZ49Wx9//LFuu+22Avfygvz8fE2cOFE333yzrFarKlSooIoVK2rLli06fvx4oc95ww03FGkB6yuvvKKAgAClpqZq8uTJBdZHXMqhQ4eUnp5u206ePFno811OWFhYgX3/9J6UKVPGtnbjgsL+fXRzc1N8fLw2bdqkw4cP66uvvlLbtm21YsUKde/e3dZu9+7dWrJkiSpWrGi3RUdHS5IOHjzo8FxASUMggUO+vr4KDQ3Vzz//XKTP/X1R6eW4u7tfcr9hGFd9jr8vQPT09NTq1av1zTff6OGHH9aWLVt0//336+6773a4WLEo/sm1XGC1WtW5c2fNnDlTc+fOvWx1RJLGjx+voUOHqlmzZvroo4+0dOlSJScn65Zbbil0JUhSgSqDIz/99JPt/zS3bt1aqM/cdtttCgkJsW1X8z6Vv7vUuP/pPbncn2FRlS9fXvfee6++/vprNW/eXGvWrLGF3Pz8fN19991KTk6+5NalSxenjAH4N2FRKwqlXbt2mjFjhlJSUhQZGXnFtlWqVFF+fr52796t2rVr2/ZnZGQoMzPTtvjPGcqVK2f3RMoFf6/CSOf/9dqqVSu1atVKr732msaPH6+nn35a3377re1fpn+/DknatWtXgWM7d+5UhQoV5OXl9c8
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Confusion Matrix for Training Data\n",
"cm_train_xg = confusion_matrix(y_train, best_xgb_model.predict(X_train))\n",
"\n",
"ax = sns.heatmap(cm_train_xg, annot=True, cmap='BuPu', fmt='g', linewidth=1.5)\n",
"\n",
"ax.set_xlabel('Predicted')\n",
"ax.set_ylabel('Actual')\n",
"ax.set_title('Confusion Matrix - Train Set')"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.94 0.84 0.88 7226\n",
" 1 0.30 0.56 0.39 906\n",
"\n",
" accuracy 0.81 8132\n",
" macro avg 0.62 0.70 0.64 8132\n",
"weighted avg 0.87 0.81 0.83 8132\n",
"\n"
]
}
],
"source": [
"# Classfication report for training data\n",
"print (classification_report(y_train, best_xgb_model.predict(X_train)))"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {},
"outputs": [],
"source": [
"# xgb.to_graphviz(best_xgb_model, num_trees=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dane testowe"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Confusion Matrix - Test Set')"
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiQAAAHHCAYAAACPy0PBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABILElEQVR4nO3deVxUZf//8fcAMiDKprKZa7nvaRnuJne4ZG7lbVmiuZS55Jp637mXFJpbamab1q1lWZpZqSQplbhhqJmZlmml4IJIoALC+f3hz/k2oQ7oHEfx9exxHo/mnGvOuc7U1NvPdV1nLIZhGAIAAHAhN1d3AAAAgEACAABcjkACAABcjkACAABcjkACAABcjkACAABcjkACAABcjkACAABcjkACAABcjkCCW9aBAwf0wAMPyM/PTxaLRatWrXLq+X/77TdZLBYtXrzYqee9lbVq1UqtWrVydTcAFEEEElyXX375RU899ZQqV64sLy8v+fr6qmnTppozZ47OnTtn6rWjoqK0Z88evfjii3rvvffUqFEjU693I/Xu3VsWi0W+vr6X/RwPHDggi8Uii8WiGTNmFPr8R48e1aRJk5SUlOSE3ppr0qRJtnu92uasoPTFF19o0qRJBW6fl5end999V40bN1ZgYKBKliypqlWrqlevXtqyZUuhr3/27FlNmjRJGzduLPR7gVuZh6s7gFvX559/rkceeURWq1W9evVS7dq1lZ2drW+//VajR4/W3r17tWjRIlOufe7cOSUkJOi///2vBg8ebMo1KlSooHPnzqlYsWKmnN8RDw8PnT17Vp999pm6d+9ud2zp0qXy8vLS+fPnr+ncR48e1eTJk1WxYkXVr1+/wO9bv379NV3venTt2lV33XWX7XVGRoYGDhyoLl26qGvXrrb9wcHBTrneF198ofnz5xc4lAwdOlTz589Xp06d1LNnT3l4eGj//v368ssvVblyZd13332Fuv7Zs2c1efJkSaIahdsKgQTX5NChQ+rRo4cqVKiguLg4hYaG2o4NGjRIBw8e1Oeff27a9U+cOCFJ8vf3N+0aFotFXl5epp3fEavVqqZNm+r999/PF0iWLVumDh066OOPP74hfTl79qyKFy8uT0/PG3K9v6tbt67q1q1re33y5EkNHDhQdevW1eOPP37D+/N3KSkpWrBggfr3758vfM+ePdv27ykAxxiywTWJiYlRRkaG3nrrLbswcsldd92lZ5991vb6woULmjp1qu68805ZrVZVrFhR//nPf5SVlWX3vooVK+rBBx/Ut99+q3vvvVdeXl6qXLmy3n33XVubSZMmqUKFCpKk0aNHy2KxqGLFipIuDnVc+vu/u1T2/7vY2Fg1a9ZM/v7+KlGihKpVq6b//Oc/tuNXmkMSFxen5s2by8fHR/7+/urUqZP27dt32esdPHhQvXv3lr+/v/z8/NSnTx+dPXv2yh/sPzz22GP68ssvlZaWZtu3fft2HThwQI899li+9qmpqRo1apTq1KmjEiVKyNfXV+3atdOuXbtsbTZu3Kh77rlHktSnTx/bkMel+2zVqpVq166txMREtWjRQsWLF7d9Lv+cQxIVFSUvL6989x8ZGamAgAAdPXq0wPd6vX766Sc9/PDDCgwMlJeXlxo1aqTVq1fbtcnJydHkyZNVpUoVeXl5qVSpUmrWrJliY2MlXfz3Z/78+ZJkNxx0JYcOHZJhGGratGm+YxaLRUFBQXb70tLSNGzYMJUrV05Wq1V33XWXXn75ZeXl5Um6+O9cmTJlJEmTJ0+2Xb8wQ0jArYoKCa7JZ599psqVK6tJkyYFat+vXz8tWbJEDz/8sEaOHKmtW7cqOjpa+/bt08qVK+3aHjx4UA8//LD69u2rqKgovf322+rdu7caNmyoWrVqqWvXrvL399fw4cP16KOPqn379ipRokSh+r937149+OCDqlu3rqZMmSKr1aqDBw/qu+++u+r7vvrqK7Vr106VK1fWpEmTdO7cOb366qtq2rSpdu7cmS8Mde/eXZUqVVJ0dLR27typN998U0FBQXr55ZcL1M+uXbvq6aef1ieffKInn3xS0sXqSPXq1XX33Xfna//rr79q1apVeuSRR1SpUiWlpKTo9ddfV8uWLfXjjz8qLCxMNWrU0JQpUzRhwgQNGDBAzZs3lyS7f5anTp1Su3bt1KNHDz3++ONXHA6ZM2eO4uLiFBUVpYSEBLm7u+v111/X+vXr9d577yksLKxA93m99u7dq6ZNm6ps2bIaO3asfHx89OGHH6pz5876+OOP1aVLF0kXg2J0dLT69eune++9V+np6dqxY4d27typf/3rX3rqqad09OhRxcbG6r333nN43UvB+KOPPtIjjzyi4sWLX7Ht2bNn1bJlS/3555966qmnVL58eW3evFnjxo3TsWPHNHv2bJUpU0avvfZaviGpv1eIgCLLAArpzJkzhiSjU6dOBWqflJRkSDL69etnt3/UqFGGJCMuLs62r0KFCoYkIz4+3rbv+PHjhtVqNUaOHGnbd+jQIUOSMX36dLtzRkVFGRUqVMjXh4kTJxp//9d91qxZhiTjxIkTV+z3pWu88847tn3169c3goKCjFOnTtn27dq1y3BzczN69eqV73pPPvmk3Tm7dOlilCpV6orX/Pt9+Pj4GIZhGA8//LDRpk0bwzAMIzc31wgJCTEmT5582c/g/PnzRm5ubr77sFqtxpQpU2z7tm/fnu/eLmnZsqUhyVi4cOFlj7Vs2dJu37p16wxJxgsvvGD8+uuvRokSJYzOnTs7vMdrdeLECUOSMXHiRNu+Nm3aGHXq1DHOnz9v25eXl2c0adLEqFKlim1fvXr1jA4dOlz1/IMGDTIK85/GXr16GZKMgIAAo0uXLsaMGTOMffv25Ws3depUw8fHx/j555/t9o8dO9Zwd3c3jhw5csX7A24HDNmg0NLT0yVJJUuWLFD7L774QpI0YsQIu/0jR46UpHxzTWrWrGn7U7sklSlTRtWqVdOvv/56zX3+p0tzTz799FNbudyRY8eOKSkpSb1791ZgYKBtf926dfWvf/3Ldp9/9/TTT9u9bt68uU6dOmX7DAviscce08aNG5WcnKy4uDglJydfdrhGujjvxM3t4tc6NzdXp06dsg1H7dy5s8DXtFqt6tOnT4HaPvDAA3rqqac0ZcoUde3aVV5eXnr99dcLfK3rlZqaqri4OHXv3l1//fWXTp48qZMnT+rUqVOKjIzUgQMH9Oeff0q6+M997969OnDggNOu/84772jevHmqVKmSVq5cqVGjRqlGjRpq06aN7brSxSpK8+bNFRAQYOvjyZMnFRERodzcXMXHxzutT8CtiECCQvP19ZUk/fXXXwVqf/jwYbm5udmtlJCkkJAQ+fv76/Dhw3b7y5cvn+8cAQEBOn369DX2OL9///vfatq0qfr166fg4GD16NFDH3744VXDyaV+VqtWLd+xGjVq6OTJk8rMzLTb/897CQgIkKRC3Uv79u1VsmRJLV++XEuXLtU999yT77O8JC8vT7NmzVKVKlVktVpVunRplSlTRrt379aZM2cKfM2yZcsWagLrjBkzFBgYqKSkJM2dOzff3InLOXHihJKTk21bRkZGga/3dwcPHpRhGBo/frzKlCljt02cOFGSdPz4cUnSlClTlJaWpqpVq6pOnToaPXq0du/efU3XvcTNzU2DBg1SYmKiTp48qU8//VTt2rVTXFycevToYWt34MABrV27Nl8fIyIi7PoI3K6YQ4JC8/X1VVhYmH744YdCve9qkwP/zt3d/bL7DcO45mvk5ubavfb29lZ8fLy+/vprff7551q7dq2WL1+u+++/X+vXr79iHwrreu7lEqvVqq5du2rJkiX69ddfrzrBcdq0aRo/fryefPJJTZ06VYGBgXJzc9OwYcMKXAmSLn4+hfH999/b/oe6Z88ePfroow7fc88999iF0YkTJ17T5M1L9zVq1ChFRkZets2lANeiRQv98ssv+vTTT7V+/Xq9+eabmjVrlhYuXKh+/foV+tr/VKpUKT300EN66KGH1KpVK23atEmHDx9WhQoVlJeXp3/9619
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Evaluate the performance of the best model on the testing set\n",
"y_pred_xgb = best_xgb_model.predict(X_test)\n",
"\n",
"# Confusion Matrix for Testig Data\n",
"cm_test_xgb = confusion_matrix(y_test, y_pred_xgb)\n",
"\n",
"ax = sns.heatmap(cm_test_xgb, annot=True, cmap='Blues', fmt='g', linewidth=1.5)\n",
"\n",
"ax.set_xlabel('Predicted')\n",
"ax.set_ylabel('Actual')\n",
"ax.set_title('Confusion Matrix - Test Set')"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.93 0.84 0.88 1797\n",
" 1 0.30 0.50 0.37 236\n",
"\n",
" accuracy 0.80 2033\n",
" macro avg 0.61 0.67 0.63 2033\n",
"weighted avg 0.85 0.80 0.82 2033\n",
"\n"
]
}
],
"source": [
"# Classfication report for testing data\n",
"print (classification_report(y_test, y_pred_xgb))"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Zbiór danych testowych zawiera 2033 oddane strzały, gdzie 236 to strzały trafione.\n",
"Dokładność klasyfikacji, czy strzał jest bramką, czy nie, wynosi 0.80%.\n",
"klasyfikator uzyskał ROC-AUC na poziomie 0.75%.\n"
]
}
],
"source": [
"print(f'Zbiór danych testowych zawiera {len(y_test)} oddane strzały, gdzie {y_test.sum()[\"isGoal\"]} to strzały trafione.')\n",
"print(f'Dokładność klasyfikacji, czy strzał jest bramką, czy nie, wynosi {best_xgb_model.score(X_test, y_test):.2f}%.')\n",
"print(f'klasyfikator uzyskał ROC-AUC na poziomie {roc_auc_score(y_test, best_xgb_model.predict_proba(X_test)[:, 1]):.2f}%.')"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwIAAAHHCAYAAAAMBu+WAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABbpUlEQVR4nO3de3zP9f//8ft7Z9vsiM00DHO2EVnOyrSNJCmnlUMo2pJjUbGhIsJyiD4U+aTUpyQJnxzCJ+djHx+0coocEjnNwg6v3x9+e3+9bWPWZt573a6Xyy68n6/T4/F+vc37/n4d3hbDMAwBAAAAMBWHoi4AAAAAwN1HEAAAAABMiCAAAAAAmBBBAAAAADAhggAAAABgQgQBAAAAwIQIAgAAAIAJEQQAAAAAEyIIAAAAACZEEAAAoBiYN2+eLBaLjhw5UtSlALATBAEAgF3KeuOb08/w4cMLZZsbN25UYmKizp8/XyjrN7PU1FQlJiZq7dq1RV0KYBpORV0AAAB/x5gxYxQSEmIzVrt27ULZ1saNGzV69Gj17NlTPj4+hbKN/HrmmWfUpUsXubq6FnUp+ZKamqrRo0dLklq2bFm0xQAmQRAAANi1mJgYNWjQoKjL+FsuX74sDw+Pv7UOR0dHOTo6FlBFd09mZqauXbtW1GUApsSpQQCAYm358uVq1qyZPDw8VLJkSbVt21Z79+61mee///2vevbsqUqVKsnNzU2BgYF69tlndfbsWes8iYmJGjZsmCQpJCTEehrSkSNHdOTIEVksFs2bNy/b9i0WixITE23WY7FYtG/fPnXr1k2+vr5q2rSpdfrHH3+s+vXrq0SJEvLz81OXLl107Nix2/aZ0zUCFStW1KOPPqq1a9eqQYMGKlGihOrUqWM9/WbRokWqU6eO3NzcVL9+fe3atctmnT179pSnp6cOHTqkqKgoeXh4KCgoSGPGjJFhGDbzXr58WUOGDFFwcLBcXV1VrVo1vfPOO9nms1gsio+P14IFC1SrVi25urpq1qxZKl26tCRp9OjR1uc263nLy/658bk9cOCA9aiNt7e3evXqpdTU1GzP2ccff6yGDRvK3d1dvr6+at68ub777jubefLy+gHsFUcEAAB27cKFCzpz5ozNWKlSpSRJ//znP9WjRw9FRUXp7bffVmpqqmbOnKmmTZtq165dqlixoiRp5cqVOnTokHr16qXAwEDt3btX//jHP7R3715t3rxZFotFTzzxhH7++Wd9+umnmjJlinUbpUuX1h9//HHHdT/11FMKDQ3VW2+9ZX2z/Oabb2rkyJHq1KmT+vTpoz/++EPTpk1T8+bNtWvXrnydjnTgwAF169ZNzz//vJ5++mm98847ateunWbNmqVXX31VL7zwgiRp3Lhx6tSpk5KTk+Xg8H+fE2ZkZCg6OloPPvigJkyYoBUrVighIUHp6ekaM2aMJMkwDD322GP6/vvv1bt3b9WtW1f//ve/NWzYMB0/flxTpkyxqWnNmjX6/PPPFR8fr1KlSik8PFwzZ85U//791aFDBz3xxBOSpLCwMEl52z836tSpk0JCQjRu3Djt3LlTc+bMUZkyZfT2229b5xk9erQSExPVuHFjjRkzRi4uLtqyZYvWrFmjRx55RFLeXz+A3TIAALBDc+fONSTl+GMYhnHp0iXDx8fH6Nu3r81yp06dMry9vW3GU1NTs63/008/NSQZ69evt45NnDjRkGQcPnzYZt7Dhw8bkoy5c+dmW48kIyEhwfo4ISHBkGR07drVZr4jR44Yjo6OxptvvmkzvmfPHsPJySnbeG7Px421VahQwZBkbNy40Tr273//25BklChRwvj111+t4++//74hyfj++++tYz169DAkGS+++KJ1LDMz02jbtq3h4uJi/PHHH4ZhGMbixYsNScYbb7xhU9OTTz5pWCwW48CBAzbPh4ODg7F3716bef/4449sz1WWvO6frOf22WeftZm3Q4cOhr+/v/XxL7/8Yjg4OBgdOnQwMjIybObNzMw0DOPOXj+AveLUIACAXZsxY4ZWrlxp8yNd/xT5/Pnz6tq1q86cOWP9cXR0VEREhL7//nvrOkqUKGH9+5UrV3TmzBk9+OCDkqSdO3cWSt39+vWzebxo0SJlZmaqU6dONvUGBgYqNDTUpt47UbNmTTVq1Mj6OCIiQpL08MMPq3z58tnGDx06lG0d8fHx1r9nndpz7do1rVq1SpK0bNkyOTo6asCAATbLDRkyRIZhaPny5TbjLVq0UM2aNfPcw53un5uf22bNmuns2bO6ePGiJGnx4sXKzMzUqFGjbI5+ZPUn3dnrB7BXnBoEALBrDRs2zPFi4V9++UXS9Te8OfHy8rL+/c8//9To0aO1cOFCnT592ma+CxcuFGC1/+fmOx398ssvMgxDoaGhOc7v7Oycr+3c+GZfkry9vSVJwcHBOY6fO3fOZtzBwUGVKlWyGatataokWa9H+PXXXxUUFKSSJUvazFejRg3r9Bvd3Pvt3On+ublnX19fSdd78/Ly0sGDB+Xg4HDLMHInrx/AXhEEAADFUmZmpqTr53kHBgZmm+7k9H//BXbq1EkbN27UsGHDVLduXXl6eiozM1PR0dHW9dzKzeeoZ8nIyMh1mRs/5c6q12KxaPny5Tne/cfT0/O2deQktzsJ5TZu3HRxb2G4uffbudP9UxC93cnrB7BXvIoBAMVS5cqVJUllypRRZGRkrvOdO3dOq1ev1ujRozVq1CjreNYnwjfK7Q1/1ifON3/R2M2fhN+uXsMwFBISYv3E/V6QmZmpQ4cO2dT0888/S5L1YtkKFSpo1apVunTpks1RgZ9++sk6/XZye27vZP/kVeXKlZWZmal9+/apbt26uc4j3f71A9gzrhEAABRLUVFR8vLy0ltvvaW0tLRs07Pu9JP16fHNnxYnJSVlWybrXv83v+H38vJSqVKltH79epvx9957L8/1PvHEE3J0dNTo0aOz1WIYRrZbZd5N06dPt6ll+vTpcnZ2VqtWrSRJbdq0UUZGhs18kjRlyhRZLBbFxMTcdhvu7u6Ssj+3d7J/8urxxx+Xg4ODxowZk+2IQtZ28vr6AewZRwQAAMWSl5eXZs6cqWeeeUb333+/unTpotKlS+vo0aP69ttv1aRJE02fPl1eXl5q3ry5JkyYoLS0NJUrV07fffedDh8+nG2d9evXlyS99tpr6tKli5ydndWuXTt5eHioT58+Gj9+vPr06aMGDRpo/fr11k/O86Jy5cp64403NGLECB05ckSPP/64SpYsqcOHD+urr77Sc889p6FDhxbY85NXbm5uWrFihXr06KGIiAgtX75c3377rV599VXrvf/btWunhx56SK+99pqOHDmi8PBwfffdd/r66681cOBA66frt1KiRAnVrFlTn332mapWrSo/Pz/Vrl1btWvXzvP+yasqVarotdde09ixY9WsWTM98cQTcnV11bZt2xQUFKRx48bl+fUD2LUiulsRAAB/S9btMrdt23bL+b7//nsjKirK8Pb2Ntzc3IzKlSsbPXv2NLZv326d57fffjM6dOhg+Pj4GN7e3sZTTz1lnDhxIsfbWY4dO9YoV66c4eDgYHO7ztTUVKN3796Gt7e3UbJkSaNTp07G6dOnc719aNatN2/25ZdfGk2bNjU8PDwMDw8Po3r16kZcXJyRnJycp+fj5tuHtm3bNtu8koy4uDibsaxboE6cONE61qNHD8PDw8M4ePCg8cgjjxju7u5GQECAkZCQkO22m5cuXTIGDRpkBAUFGc7OzkZoaKgxceJE6+04b7XtLBs3bjTq169vuLi42Dxved0/uT23OT03hmEYH374oVGvXj3D1dXV8PX1NVq0aGGsXLnSZp68vH4Ae2UxjLtwVRAAALA7PXv21BdffKGUlJSiLgVAIeAaAQAAAMCECAIAAACACREEAAAAABPiGgEAAADAhDgiAAAAAJgQQQAAAAAwIb5QDECOMjMzdeLECZUsWVIWi6WoywEAAHlgGIYuXbqkoKAgOTjc+jN/ggCAHJ04cULBwcFFXQYAAMiHY8eO6b777rvlPAQBADkqWbKkJOnw4cP
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot feature importance\n",
"xgb.plot_importance(best_xgb_model)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1UAAAHHCAYAAABXznKnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAB+WUlEQVR4nO3dd1QU198G8GfpvUoRRUCpFuwi9oICGizR2Ii9K7bYG4INRVR+lqARI2I30diiKKCYWKKIvaEodtFYEBGl7bx/cJjXkUXRNaLwfM7hxJ25O3u/sxvYZ++duzJBEAQQERERERHRJ1Ep7g4QERERERF9yxiqiIiIiIiIlMBQRUREREREpASGKiIiIiIiIiUwVBERERERESmBoYqIiIiIiEgJDFVERERERERKYKgiIiIiIiJSAkMVERERERGREhiqiIiISoCIiAjIZDLcunWruLtCRFTqMFQREdE3KT9EKPqZNGnSf/KYx44dQ0BAAFJTU/+T45dmGRkZCAgIQFxcXHF3hYjoo6kVdweIiIiUMXPmTNjZ2Um2Va1a9T95rGPHjiEwMBB9+vSBkZHRf/IYn6pnz57o1q0bNDU1i7srnyQjIwOBgYEAgGbNmhVvZ4iIPhJDFRERfdO8vb1Rp06d4u6GUl69egVdXV2ljqGqqgpVVdXP1KMvRy6XIysrq7i7QUSkFE7/IyKiEm3fvn1o3LgxdHV1oa+vj7Zt2+LSpUuSNufPn0efPn1QsWJFaGlpwdLSEv369cPTp0/FNgEBARg/fjwAwM7OTpxqeOvWLdy6dQsymQwREREFHl8mkyEgIEByHJlMhsuXL6NHjx4wNjZGo0aNxP3r169H7dq1oa2tDRMTE3Tr1g137979YJ2KrqmytbXFd999h7i4ONSpUwfa2tqoVq2aOMVu+/btqFatGrS0tFC7dm2cOXNGcsw+ffpAT08PN2/ehKenJ3R1dWFlZYWZM2dCEARJ21evXmHs2LGwtraGpqYmnJycEBISUqCdTCaDn58fNmzYgCpVqkBTUxMrVqyAmZkZACAwMFA8t/nnrSjPz9vnNikpSRxNNDQ0RN++fZGRkVHgnK1fvx716tWDjo4OjI2N0aRJExw4cEDSpiivHyIijlQREdE37cWLF3jy5IlkW5kyZQAA69atQ+/eveHp6Yn58+cjIyMDYWFhaNSoEc6cOQNbW1sAQHR0NG7evIm+ffvC0tISly5dwi+//IJLly7hn3/+gUwmw/fff49r165h06ZNWLx4sfgYZmZm+Pfffz+63z/88AMcHBwwd+5cMXjMmTMH06dPR5cuXTBgwAD8+++/WLp0KZo0aYIzZ8580pTDpKQk9OjRA4MHD8aPP/6IkJAQ+Pj4YMWKFZgyZQqGDRsGAAgKCkKXLl2QmJgIFZX//8w1NzcXXl5eqF+/PoKDgxEVFYUZM2YgJycHM2fOBAAIgoB27drh0KFD6N+/P2rUqIH9+/dj/PjxuH//PhYvXizp08GDB7F161b4+fmhTJkyqF69OsLCwjB06FB07NgR33//PQDA1dUVQNGen7d16dIFdnZ2CAoKwunTpxEeHg5zc3PMnz9fbBMYGIiAgAA0aNAAM2fOhIaGBk6cOIGDBw+idevWAIr++iEigkBERPQNWrNmjQBA4Y8gCMLLly8FIyMjYeDAgZL7paSkCIaGhpLtGRkZBY6/adMmAYDw119/idsWLFggABCSk5MlbZOTkwUAwpo1awocB4AwY8YM8faMGTMEAEL37t0l7W7duiWoqqoKc+bMkWy/cOGCoKamVmB7Yefj7b7Z2NgIAIRjx46J2/bv3y8AELS1tYXbt2+L21euXCkAEA4dOiRu6927twBAGDFihLhNLpcLbdu2FTQ0NIR///1XEARB2LFjhwBAmD17tqRPnTt3FmQymZCUlCQ5HyoqKsKlS5ckbf/9998C5ypfUZ+f/HPbr18/SduOHTsKpqam4u3r168LKioqQseOHYXc3FxJW7lcLgjCx71+iIg4/Y+IiL5py5cvR3R0tOQHyBvdSE1NRffu3fHkyRPxR1VVFW5ubjh06JB4DG1tbfHfb968wZMnT1C/fn0AwOnTp/+Tfg8ZMkRye/v27ZDL5ejSpYukv5aWlnBwcJD092NUrlwZ7u7u4m03NzcAQIsWLVChQoUC22/evFngGH5+fuK/86fvZWVlISYmBgCwd+9eqKqqYuTIkZL7jR07FoIgYN++fZLtTZs2ReXKlYtcw8c+P++e28aNG+Pp06dIS0sDAOzYsQNyuRz+/v6SUbn8+oCPe/0QEXH6HxERfdPq1auncKGK69evA8gLD4oYGBiI/3727BkCAwOxefNmPH78WNLuxYsXn7G3/+/dFQuvX78OQRDg4OCgsL26uvonPc7bwQkADA0NAQDW1tYKtz9//lyyXUVFBRUrVpRsc3R0BADx+q3bt2/DysoK+vr6knYuLi7i/re9W/uHfOzz827NxsbGAPJqMzAwwI0bN6CiovLeYPcxrx8iIoYqIiIqkeRyOYC862IsLS0L7FdT+/8/gV26dMGxY8cwfvx41KhRA3p6epDL5fDy8hKP8z7vXtOTLzc3t9D7vD36kt9fmUyGffv2KVzFT09P74P9UKSwFQEL2y68s7DEf+Hd2j/kY5+fz1Hbx7x+iIj4G4GIiEqkSpUqAQDMzc3h4eFRaLvnz58jNjYWgYGB8Pf3F7fnj1S8rbDwlD8S8u6XAr87QvOh/gqCADs7O3Ek6Gsgl8tx8+ZNSZ+uXbsGAOJCDTY2NoiJicHLly8lo1VXr14V939IYef2Y56foqpUqRLkcjkuX76MGjVqFNoG+PDrh4gI4JLqRERUQnl6esLAwABz585FdnZ2gf35K/blj2q8O4oRGhpa4D753yX1bngyMDBAmTJl8Ndff0m2//zzz0Xu7/fffw9VVVUEBgYW6IsgCAWWD/+Sli1bJunLsmXLoK6ujpYtWwIA2rRpg9zcXEk7AFi8eDFkMhm8vb0/+Bg6OjoACp7bj3l+iqpDhw5QUVHBzJkzC4x05T9OUV8/REQAR6qIiKiEMjAwQFhYGHr27IlatWqhW7duMDMzw507d/Dnn3+iYcOGWLZsGQwMDNCkSRMEBwcjOzsb5cqVw4EDB5CcnFzgmLVr1wYATJ06Fd26dYO6ujp8fHygq6uLAQMGYN68eRgwYADq1KmDv/76SxzRKYpKlSph9uzZmDx5Mm7duoUOHTpAX18fycnJ+OOPPzBo0CCMGzfus52fotLS0kJUVBR69+4NNzc37Nu3D3/++SemTJkifreUj48PmjdvjqlTp+LWrVuoXr06Dhw4gJ07d2L06NHiqM/7aGtro3LlytiyZQscHR1hYmKCqlWromrVqkV+forK3t4eU6dOxaxZs9C4cWN8//330NTURHx8PKysrBAUFFTk1w8REcBQRUREJViPHj1gZWWFefPmYcGCBcjMzES5cuXQuHFj9O3bV2y3ceNGjBgxAsuXL4cgCGjdujX27dsHKysryfHq1q2LWbNmYcWKFYiKioJcLkdycjJ0dXXh7++Pf//9F7///ju2bt0Kb29v7Nu3D+bm5kXu76RJk+Do6IjFixcjMDAQQN6CEq1bt0a7du0+z0n5SKqqqoiKisLQoUMxfvx46OvrY8aMGZKpeCoqKti1axf8/f2xZcsWrFmzBra2tliwYAHGjh1b5McKDw/HiBEjMGbMGGRlZWHGjBmoWrVqkZ+fjzFz5kzY2dlh6dKlmDp1KnR0dODq6oqePXuKbYr6+iEikglf4opUIiIi+ub06dMHv//+O9LT04u7K0REXzVeU0VERERERKQEhioiIiIiIiIlMFQREREREREpgddUERERERERKYEjVUREREREREpgqCIiIiIiIlICv6eKiBSSy+V48OAB9PX1IZPJirs7REREVASCIODly5ewsrKCigrHT74UhioiUujBgwewtrYu7m4QERHRJ7h79y7Kly9f3N0oNRiqiEghfX19AEBycjJMTEyKuTdfVnZ2Ng4cOIDWrVtDXV29uLvzRbF21l6aai+tdQOsvSTXnpaWBmtra/H
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"xgb.plot_importance(best_xgb_model, importance_type='gain', xlabel='Gain')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwIAAAHHCAYAAAAMBu+WAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABcZklEQVR4nO3de3zP9f//8ft7Zzubw2YaG+YwhxFZDjlkckrSwWnlECuy5FhUGCpCyCH6UKSU+pQk4ZPTKOdjHx+0IiKHRE4zh23v1+8Pv72/3jazzWbee92ul8suvJ+v0+Pxfr3N+/5+Hd4WwzAMAQAAADAVp4IuAAAAAMDdRxAAAAAATIggAAAAAJgQQQAAAAAwIYIAAAAAYEIEAQAAAMCECAIAAACACREEAAAAABMiCAAAAAAmRBAAAKAQmDdvniwWiw4fPlzQpQBwEAQBAIBDSn/jm9nP0KFD82WbGzduVHx8vM6dO5cv6zez5ORkxcfHKyEhoaBLAUzDpaALAADgTowePVphYWF2Y9WqVcuXbW3cuFGjRo1S9+7d5e/vny/byK1nn31WnTp1kru7e0GXkivJyckaNWqUJKlJkyYFWwxgEgQBAIBDa9WqlerUqVPQZdyRS5cuycvL647W4ezsLGdn5zyq6O6xWq26du1aQZcBmBKnBgEACrXly5froYcekpeXl3x8fNSmTRvt3bvXbp7//ve/6t69u8qVKycPDw8FBQXpueee05kzZ2zzxMfHa8iQIZKksLAw22lIhw8f1uHDh2WxWDRv3rwM27dYLIqPj7dbj8Vi0b59+9SlSxcVLVpUDRs2tE3/9NNPVbt2bRUpUkQBAQHq1KmTjh49ets+M7tGIDQ0VI8++qgSEhJUp04dFSlSRNWrV7edfrNo0SJVr15dHh4eql27tnbt2mW3zu7du8vb21u///67WrRoIS8vLwUHB2v06NEyDMNu3kuXLmnQoEEKCQmRu7u7KlWqpIkTJ2aYz2KxKC4uTgsWLFDVqlXl7u6uWbNmqUSJEpKkUaNG2Z7b9OctO/vnxuf2wIEDtqM2fn5+6tGjh5KTkzM8Z59++qnq1q0rT09PFS1aVI0aNdIPP/xgN092Xj+Ao+KIAADAoZ0/f16nT5+2GytevLgk6ZNPPlG3bt3UokULvfPOO0pOTtbMmTPVsGFD7dq1S6GhoZKklStX6vfff1ePHj0UFBSkvXv36l//+pf27t2rzZs3y2Kx6IknntCvv/6qzz//XJMnT7Zto0SJEvr7779zXPfTTz+t8PBwvf3227Y3y2+99ZaGDx+uDh06qFevXvr77781bdo0NWrUSLt27crV6UgHDhxQly5d9MILL+iZZ57RxIkT1bZtW82aNUuvvfaaXnzxRUnS2LFj1aFDByUmJsrJ6f8+J0xLS1PLli314IMPavz48VqxYoVGjhyp1NRUjR49WpJkGIYee+wxrV27Vj179lTNmjX1n//8R0OGDNGxY8c0efJku5rWrFmjL7/8UnFxcSpevLgiIyM1c+ZM9enTR+3bt9cTTzwhSapRo4ak7O2fG3Xo0EFhYWEaO3asdu7cqTlz5qhkyZJ65513bPOMGjVK8fHxql+/vkaPHi03Nzdt2bJFa9as0SOPPCIp+68fwGEZAAA4oLlz5xqSMv0xDMO4ePGi4e/vb8TGxtotd/LkScPPz89uPDk5OcP6P//8c0OSsX79etvYhAkTDEnGoUOH7OY9dOiQIcmYO3duhvVIMkaOHGl7PHLkSEOS0blzZ7v5Dh8+bDg7OxtvvfWW3fiePXsMFxeXDOO3ej5urK1s2bKGJGPjxo22sf/85z+GJKNIkSLGH3/8YRv/4IMPDEnG2rVrbWPdunUzJBkvvfSSbcxqtRpt2rQx3NzcjL///tswDMNYvHixIcl488037Wp66qmnDIvFYhw4cMDu+XBycjL27t1rN+/ff/+d4blKl939k/7cPvfcc3bztm/f3ihWrJjt8W+//WY4OTkZ7du3N9LS0uzmtVqthmHk7PUDOCpODQIAOLQZM2Zo5cqVdj/S9U+Rz507p86dO+v06dO2H2dnZ0VFRWnt2rW2dRQpUsT29ytXruj06dN68MEHJUk7d+7Ml7p79+5t93jRokWyWq3q0KGDXb1BQUEKDw+3qzcnIiIiVK9ePdvjqKgoSdLDDz+sMmXKZBj//fffM6wjLi7O9vf0U3uuXbumVatWSZKWLVsmZ2dn9evXz265QYMGyTAMLV++3G68cePGioiIyHYPOd0/Nz+3Dz30kM6cOaMLFy5IkhYvXiyr1aoRI0bYHf1I70/K2esHcFScGgQAcGh169bN9GLh3377TdL1N7yZ8fX1tf39n3/+0ahRo7Rw4UKdOnXKbr7z58/nYbX/5+Y7Hf32228yDEPh4eGZzu/q6pqr7dz4Zl+S/Pz8JEkhISGZjp89e9Zu3MnJSeXKlbMbq1ixoiTZrkf4448/FBwcLB8fH7v5qlSpYpt+o5t7v52c7p+bey5atKik6735+vrq4MGDcnJyyjKM5OT1AzgqggAAoFCyWq2Srp/nHRQUlGG6i8v//RfYoUMHbdy4UUOGDFHNmjXl7e0tq9Wqli1b2taTlZvPUU+XlpZ2y2Vu/JQ7vV6LxaLly5dnevcfb2/v29aRmVvdSehW48ZNF/fmh5t7v52c7p+86C0nrx/AUfEqBgAUSuXLl5cklSxZUtHR0bec7+zZs1q9erVGjRqlESNG2MbTPxG+0a3e8Kd/4nzzF43d/En47eo1DENhYWG2T9zvBVarVb///rtdTb/++qsk2S6WLVu2rFatWqWLFy/aHRX45ZdfbNNv51bPbU72T3aVL19eVqtV+/btU82aNW85j3T71w/gyLhGAABQKLVo0UK+vr56++23lZKSkmF6+p1+0j89vvnT4ilTpmRYJv1e/ze/4ff19VXx4sW1fv16u/H3338/2/U+8cQTcnZ21qhRozLUYhhGhltl3k3Tp0+3q2X69OlydXVVs2bNJEmtW7dWWlqa3XySNHnyZFksFrVq1eq22/D09JSU8bnNyf7Jrscff1xOTk4aPXp0hiMK6dvJ7usHcGQcEQAAFEq+vr6aOXOmnn32Wd1///3q1KmTSpQooSNHjuj7779XgwYNNH36dPn6+qpRo0YaP368UlJSVLp0af3www86dOhQhnXWrl1bkvT666+rU6dOcnV1Vdu2beXl5aVevXpp3Lhx6tWrl+rUqaP169fbPjnPjvLly+vNN9/UsGHDdPjwYT3++OPy8fHRoUOH9M033+j555/X4MGD8+z5yS4PDw+tWLFC3bp1U1RUlJYvX67vv/9er732mu3e/23btlXTpk31+uuv6/Dhw4qMjNQPP/ygb7/9Vv3797d9up6VIkWKKCIiQl988YUqVqyogIAAVatWTdWqVcv2/smuChUq6PXXX9eYMWP00EMP6YknnpC7u7u2bdum4OBgjR07NtuvH8ChFdDdigAAuCPpt8vctm1blvOtXbvWaNGiheHn52d4eHgY5cuXN7p3725s377dNs+ff/5ptG/f3vD39zf8/PyMp59+2jh+/Himt7McM2aMUbp0acPJycnudp3JyclGz549DT8/P8PHx8fo0KGDcerUqVvePjT91ps3+/rrr42GDRsaXl5ehpeXl1G5cmWjb9++RmJiYraej5tvH9qmTZsM80oy+vbtazeWfgvUCRMm2Ma6detmeHl5GQcPHjQeeeQRw9PT0wgMDDRGjhyZ4babFy9eNAYMGGAEBwcbrq6uRnh4uDFhwgTb7Tiz2na6jRs3GrVr1zbc3Nzsnrfs7p9bPbeZPTeGYRgfffSRUatWLcPd3d0oWrSo0bhxY2PlypV282Tn9QM4Koth3IWrggAAgMPp3r27vvrqKyUlJRV0KQDyAdcIAAAAACZEEAAAAABMiCAAAAAAmBDXCAAAAAAmxBEBAAAAwIQIAgAAAIAJ8YViADJltVp1/Phx+fj4yGKxFHQ5AAAgGwzD0MWLFxUcHCwnp6w/8ycIAMjU8ePHFRISUtBlAACAXDh69Kjuu+++LOchCADIlI+PjyTp0KFDCgg
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"xgb.plot_importance(best_xgb_model, importance_type='weight', xlabel='Weight')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Podsumowanie"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [],
"source": [
"prec_xgb_train = precision_score(y_train, best_xgb_model.predict(X_train))\n",
"prec_xgb_test = precision_score(y_test, y_pred_xgb)\n",
"rec_xgb_train = recall_score(y_train, best_xgb_model.predict(X_train))\n",
"rec_xgb_test = recall_score(y_test, y_pred_xgb)\n",
"acc_xgb_train = accuracy_score(y_train, best_xgb_model.predict(X_train))\n",
"acc_xgb_test = accuracy_score(y_test, y_pred_xgb)\n",
"train_time = xgb_training_time/60"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_c3a1d_row0_col0, #T_c3a1d_row0_col1, #T_c3a1d_row0_col2, #T_c3a1d_row0_col3, #T_c3a1d_row0_col4, #T_c3a1d_row0_col5, #T_c3a1d_row0_col6 {\n",
" font-weight: bold;\n",
" border: 2.0px solid grey;\n",
" color: white;\n",
"}\n",
"</style>\n",
"<table id=\"T_c3a1d\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_c3a1d_level0_col0\" class=\"col_heading level0 col0\" >Training Accuracy</th>\n",
" <th id=\"T_c3a1d_level0_col1\" class=\"col_heading level0 col1\" >Training Precision</th>\n",
" <th id=\"T_c3a1d_level0_col2\" class=\"col_heading level0 col2\" >Training Recall</th>\n",
" <th id=\"T_c3a1d_level0_col3\" class=\"col_heading level0 col3\" >Testing Accuracy</th>\n",
" <th id=\"T_c3a1d_level0_col4\" class=\"col_heading level0 col4\" >Testing Precision</th>\n",
" <th id=\"T_c3a1d_level0_col5\" class=\"col_heading level0 col5\" >Testing Recall</th>\n",
" <th id=\"T_c3a1d_level0_col6\" class=\"col_heading level0 col6\" >Training Time (mins)</th>\n",
" </tr>\n",
" <tr>\n",
" <th class=\"index_name level0\" >Model Name</th>\n",
" <th class=\"blank col0\" >&nbsp;</th>\n",
" <th class=\"blank col1\" >&nbsp;</th>\n",
" <th class=\"blank col2\" >&nbsp;</th>\n",
" <th class=\"blank col3\" >&nbsp;</th>\n",
" <th class=\"blank col4\" >&nbsp;</th>\n",
" <th class=\"blank col5\" >&nbsp;</th>\n",
" <th class=\"blank col6\" >&nbsp;</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_c3a1d_level0_row0\" class=\"row_heading level0 row0\" >XG Boost</th>\n",
" <td id=\"T_c3a1d_row0_col0\" class=\"data row0 col0\" >0.806</td>\n",
" <td id=\"T_c3a1d_row0_col1\" class=\"data row0 col1\" >0.300</td>\n",
" <td id=\"T_c3a1d_row0_col2\" class=\"data row0 col2\" >0.556</td>\n",
" <td id=\"T_c3a1d_row0_col3\" class=\"data row0 col3\" >0.803</td>\n",
" <td id=\"T_c3a1d_row0_col4\" class=\"data row0 col4\" >0.296</td>\n",
" <td id=\"T_c3a1d_row0_col5\" class=\"data row0 col5\" >0.504</td>\n",
" <td id=\"T_c3a1d_row0_col6\" class=\"data row0 col6\" >11.291</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x163c80a91b0>"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Creating of dataframe of summary results\n",
"summary_df = pd.DataFrame({'Model Name':['XG Boost'],\n",
" 'Training Accuracy': acc_xgb_train, \n",
" 'Training Precision': prec_xgb_train,\n",
" 'Training Recall':rec_xgb_train,\n",
" 'Testing Accuracy': acc_xgb_test, \n",
" 'Testing Precision': prec_xgb_test,\n",
" 'Testing Recall':rec_xgb_test,\n",
" 'Training Time (mins)': train_time})\n",
"\n",
"summary_df.set_index('Model Name', inplace=True)\n",
"# Displaying summary of results\n",
"summary_df.style.format(precision =3).set_properties(**{'font-weight': 'bold',\n",
" 'border': '2.0px solid grey','color': 'white'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Zapisywanie modelu"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['xgboost.joblib']"
]
},
"execution_count": 197,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from joblib import dump\n",
"dump(best_xgb_model, 'xgboost.joblib') "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Wczytywanie modelu"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [],
"source": [
"from joblib import load\n",
"\n",
"model2 = load('xgboost.joblib')"
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'objective': 'binary:logistic',\n",
" 'base_score': None,\n",
" 'booster': None,\n",
" 'callbacks': None,\n",
" 'colsample_bylevel': None,\n",
" 'colsample_bynode': None,\n",
" 'colsample_bytree': None,\n",
" 'device': None,\n",
" 'early_stopping_rounds': None,\n",
" 'enable_categorical': True,\n",
" 'eval_metric': None,\n",
" 'feature_types': None,\n",
" 'gamma': None,\n",
" 'grow_policy': None,\n",
" 'importance_type': None,\n",
" 'interaction_constraints': None,\n",
" 'learning_rate': 0.001,\n",
" 'max_bin': None,\n",
" 'max_cat_threshold': None,\n",
" 'max_cat_to_onehot': None,\n",
" 'max_delta_step': None,\n",
" 'max_depth': 3,\n",
" 'max_leaves': None,\n",
" 'min_child_weight': None,\n",
" 'missing': nan,\n",
" 'monotone_constraints': None,\n",
" 'multi_strategy': None,\n",
" 'n_estimators': 100,\n",
" 'n_jobs': None,\n",
" 'num_parallel_tree': None,\n",
" 'random_state': None,\n",
" 'reg_alpha': None,\n",
" 'reg_lambda': None,\n",
" 'sampling_method': None,\n",
" 'scale_pos_weight': 7.975717439293598,\n",
" 'subsample': None,\n",
" 'tree_method': 'hist',\n",
" 'validate_parameters': None,\n",
" 'verbosity': None}"
]
},
"execution_count": 199,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model2.get_params()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}