{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Importy" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Wczytanie danych" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('data4.csv')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "y = pd.DataFrame(df['isGoal'])\n", "X = df.drop(['isGoal'], axis=1)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\anaconda3\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_minutematch_secondposition_xposition_yplay_typeBodyPartNumber_Intervening_OpponentsNumber_Intervening_TeammatesInterference_on_Shooteroutcome...Interference_on_Shooter_Codedistance_to_goalMdistance_to_centerMangleisFootisHeadheader_distance_to_goalMHighLowMedium
876713289.23-2.24Open PlayHead30MediumGoal...29.4991682.24528313.672174019.499168001
579878914.4612.72Open PlayLeft30LowSaved...119.27833212.75000041.404002100.000000010
601878279.7314.22Open PlayLeft20LowMissed...117.25793314.25353855.681087100.000000010
4961343434.910.25Open PlayRight41LowSaved...134.9108990.2505900.411271100.000000010
447525726.931.00Open PlayLeft20MediumSaved...226.9486481.0023582.131616100.000000001
\n", "

5 rows × 29 columns

\n", "
" ], "text/plain": [ " match_minute match_second position_x position_y play_type BodyPart \\\n", "8767 13 28 9.23 -2.24 Open Play Head \n", "5798 78 9 14.46 12.72 Open Play Left \n", "6018 78 27 9.73 14.22 Open Play Left \n", "4961 34 34 34.91 0.25 Open Play Right \n", "447 52 57 26.93 1.00 Open Play Left \n", "\n", " Number_Intervening_Opponents Number_Intervening_Teammates \\\n", "8767 3 0 \n", "5798 3 0 \n", "6018 2 0 \n", "4961 4 1 \n", "447 2 0 \n", "\n", " Interference_on_Shooter outcome ... Interference_on_Shooter_Code \\\n", "8767 Medium Goal ... 2 \n", "5798 Low Saved ... 1 \n", "6018 Low Missed ... 1 \n", "4961 Low Saved ... 1 \n", "447 Medium Saved ... 2 \n", "\n", " distance_to_goalM distance_to_centerM angle isFoot isHead \\\n", "8767 9.499168 2.245283 13.672174 0 1 \n", "5798 19.278332 12.750000 41.404002 1 0 \n", "6018 17.257933 14.253538 55.681087 1 0 \n", "4961 34.910899 0.250590 0.411271 1 0 \n", "447 26.948648 1.002358 2.131616 1 0 \n", "\n", " header_distance_to_goalM High Low Medium \n", "8767 9.499168 0 0 1 \n", "5798 0.000000 0 1 0 \n", "6018 0.000000 0 1 0 \n", "4961 0.000000 0 1 0 \n", "447 0.000000 0 0 1 \n", "\n", "[5 rows x 29 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.head()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isGoal
87671
57980
60180
49610
4470
\n", "
" ], "text/plain": [ " isGoal\n", "8767 1\n", "5798 0\n", "6018 0\n", "4961 0\n", "447 0" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Przygotowanie danych" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['match_minute', 'match_second', 'position_x', 'position_y', 'play_type',\n", " 'BodyPart', 'Number_Intervening_Opponents',\n", " 'Number_Intervening_Teammates', 'Interference_on_Shooter', 'outcome',\n", " 'position_xM', 'position_yM', 'position_xM_r', 'position_yM_r',\n", " 'position_xM_std', 'position_yM_std', 'position_xM_std_r',\n", " 'position_yM_std_r', 'BodyPartCode', 'Interference_on_Shooter_Code',\n", " 'distance_to_goalM', 'distance_to_centerM', 'angle', 'isFoot', 'isHead',\n", " 'header_distance_to_goalM', 'High', 'Low', 'Medium'],\n", " dtype='object')" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Uwzględnienie wybranych cech: \n", "- Współrzędna x strzelającego,\n", "- Współrzędna y strzelającego,\n", "- Dystans do bramki,\n", "- Kąt do bramki,\n", "- Minuta meczu,\n", "- Liczba przeciwników przed piłką,\n", "- Liczba zawodników ze swojej drużyny przed piłką,\n", "- Część ciała." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "X_train_extracted = X_train[['position_x', 'position_y', 'distance_to_goalM', \n", " 'angle', 'match_minute', 'Number_Intervening_Opponents', \n", " 'Number_Intervening_Teammates', 'isFoot', 'isHead']]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "X_test_extracted = X_test[['position_x', 'position_y', 'distance_to_goalM', \n", " 'angle', 'match_minute', 'Number_Intervening_Opponents', \n", " 'Number_Intervening_Teammates', 'isFoot', 'isHead']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Trening danych" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", " y = column_or_1d(y, warn=True)\n" ] }, { "data": { "text/html": [ "
LogisticRegression(max_iter=500)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LogisticRegression(max_iter=500)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = LogisticRegression(max_iter=500)\n", "model.fit(X_train_extracted, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ewaluacja modelu" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zbiór danych testowych zawiera 2033 oddane strzały, gdzie 236 to strzały trafione.\n", "Dokładność klasyfikacji, czy strzał jest bramką, czy nie, wynosi 0.89%.\n", "klasyfikator uzyskał ROC-AUC na poziomie 0.76%.\n" ] } ], "source": [ "from sklearn.metrics import roc_auc_score\n", "\n", "print(f'Zbiór danych testowych zawiera {len(y_test)} oddane strzały, gdzie {y_test.sum()[\"isGoal\"]} to strzały trafione.')\n", "print(f'Dokładność klasyfikacji, czy strzał jest bramką, czy nie, wynosi {model.score(X_test_extracted, y_test):.2f}%.')\n", "print(f'klasyfikator uzyskał ROC-AUC na poziomie {roc_auc_score(y_test, model.predict_proba(X_test_extracted)[:, 1]):.2f}%.')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.89 0.99 0.94 1797\n", " 1 0.59 0.09 0.16 236\n", "\n", " accuracy 0.89 2033\n", " macro avg 0.74 0.54 0.55 2033\n", "weighted avg 0.86 0.89 0.85 2033\n", "\n" ] } ], "source": [ "from sklearn.metrics import classification_report\n", "\n", "print(classification_report(y_test,model.predict(X_test_extracted)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Zapisywanie modelu" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['regresja_logistyczna.joblib']" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from joblib import dump\n", "dump(model, 'regresja_logistyczna.joblib') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Wczytywanie modelu" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "from joblib import load\n", "\n", "model2 = load('regresja_logistyczna.joblib')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 2 }