352 lines
89 KiB
Plaintext
352 lines
89 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import requests\n",
|
||
|
"import io\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import json\n",
|
||
|
"import urllib.request\n",
|
||
|
"import lxml\n",
|
||
|
"from bs4 import BeautifulSoup\n",
|
||
|
"url = 'file:///home/kirugulige/table.html'\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df = pd.read_html(url,encoding = 'utf-8')[0] # encoding = utf-8 for word like Piqué [é]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"players = df[['Name','Age','Unnamed: 6']]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"players = players.rename(columns={'Unnamed: 6' : 'Minutes Played'})"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"max_minute = players['Minutes Played'].max()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"percentile = []\n",
|
||
|
"for i in range(len(players)):\n",
|
||
|
" percentile.append((players.iloc[i]['Minutes Played']/max_minute)*100)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"players['Percentile'] = percentile"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Name</th>\n",
|
||
|
" <th>Age</th>\n",
|
||
|
" <th>Minutes Played</th>\n",
|
||
|
" <th>Percentile</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>M. ter Stegen</td>\n",
|
||
|
" <td>27</td>\n",
|
||
|
" <td>3150</td>\n",
|
||
|
" <td>100.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>J. Cillessen</td>\n",
|
||
|
" <td>30</td>\n",
|
||
|
" <td>270</td>\n",
|
||
|
" <td>8.571429</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>Iñaki Peña</td>\n",
|
||
|
" <td>20</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>Jokin Ezkieta</td>\n",
|
||
|
" <td>22</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>Nélson Semedo</td>\n",
|
||
|
" <td>25</td>\n",
|
||
|
" <td>1601</td>\n",
|
||
|
" <td>50.825397</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" Name Age Minutes Played Percentile\n",
|
||
|
"0 M. ter Stegen 27 3150 100.000000\n",
|
||
|
"1 J. Cillessen 30 270 8.571429\n",
|
||
|
"2 Iñaki Peña 20 0 0.000000\n",
|
||
|
"3 Jokin Ezkieta 22 0 0.000000\n",
|
||
|
"4 Nélson Semedo 25 1601 50.825397"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"players.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# variable to knowif player just joined\n",
|
||
|
"year_at_club = [1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"#len(year_at_club)\n",
|
||
|
"players['years'] = year_at_club"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Name</th>\n",
|
||
|
" <th>Age</th>\n",
|
||
|
" <th>Minutes Played</th>\n",
|
||
|
" <th>Percentile</th>\n",
|
||
|
" <th>years</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>M. ter Stegen</td>\n",
|
||
|
" <td>27</td>\n",
|
||
|
" <td>3150</td>\n",
|
||
|
" <td>100.000000</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>J. Cillessen</td>\n",
|
||
|
" <td>30</td>\n",
|
||
|
" <td>270</td>\n",
|
||
|
" <td>8.571429</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>Iñaki Peña</td>\n",
|
||
|
" <td>20</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>Jokin Ezkieta</td>\n",
|
||
|
" <td>22</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>Nélson Semedo</td>\n",
|
||
|
" <td>25</td>\n",
|
||
|
" <td>1601</td>\n",
|
||
|
" <td>50.825397</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" Name Age Minutes Played Percentile years\n",
|
||
|
"0 M. ter Stegen 27 3150 100.000000 1\n",
|
||
|
"1 J. Cillessen 30 270 8.571429 1\n",
|
||
|
"2 Iñaki Peña 20 0 0.000000 0\n",
|
||
|
"3 Jokin Ezkieta 22 0 0.000000 0\n",
|
||
|
"4 Nélson Semedo 25 1601 50.825397 1"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"players.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAALeCAYAAACgIyAQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd8jef/x/HXySYSYibEjr1HqU1i1Gi19ixK6/u1W74UrSjV2qW0tWeLIrFHxagZQggxIlONxEwQIomM3x9+OZUKEkI03s/HI4+ec1/rc9/nPNJ8XNd93QZ7e/tEREREREREJNVMMjoAERERERGRfxslUiIiIiIiImmkREpERERERCSNlEiJiIiIiIikkRIpERERERGRNFIiJSIiIiIikkZKpDLI0KFDCQsLY8aMGRkah5ubW4bH8CI6dOhAWFgYHh4eGR3Ka2VqasqMGTPw8/MjLCyMMWPGPPFdSro2bm5uGRytiIiISOaV6RMpLy8vwsLCCAsL4/Lly5w8eZJFixZRqFChjA7tXyFLliyMGzeOY8eOceHCBU6fPs2GDRto0qRJRoeWao9/B0qUKPHKx3NzczOOFxYWhp+fHxs2bKB+/fov3XeLFi3o2LEj8fHxLFiwgMOHD3P8+HHmz5/P3r170yF6EREREUkNs4wO4HXZsWMHFy9epGHDhjRv3hwbGxvat2//Qn0ZDAYAEhMz/7OMR40aRZ8+ffD398fDw4OcOXNSrVo1ypUr96+YDXr33XcpWLCg8X27du34/vvvX8vYnp6enDlzhooVK1KjRg2WLVtG06ZN8ff3f6KumZkZcXFxz+2zePHiAOzatYuvv/7aeHzPnj3pF7iIiIiIPFemn5FKsnLlSr7++mvGjRsHgJOTk7HM1dUVLy8vQkJCCA4OZvPmzdSqVctYnjTDMHr0aLZs2cLFixcpUKAAWbJkYdiwYezfv5/g4GC8vb3p2rUr8GgJVr9+/di3bx9BQUHs3buXbt26PTPG5s2bs23bNgICAjh69Cjfffcdtra2ADg6OhpnODp16sSxY8c4d+4c33zzjbF9/fr12bFjB+fPn+fixYscPXqUYcOGvdR1q127NgD9+/dn5MiR9O3bl+rVq7N48WJjnXfffZc9e/YQFBTErFmz+OWXXwgLCzNe65SWMSadi6OjI/D8zyBfvnysWrWKoKAg1q9fn+oZxbZt2wLg6+sLwEcffZSs/PF+N2zYwLBhw55YMliqVCmWL1+Or68vp0+fZsGCBRQoUOC5Y2/bto2vv/6aNm3acPv2bSwtLY2zUknn/+mnn3LkyBEOHDgAPPqc582bh4+PD+fOnWPt2rVUqVLFeB1HjBgBQPv27QkLC6NDhw6pWiZao0YN3NzcOHfuHCdOnGD69OnY2dml6hqKiIiIyJPemkSqc+fOjB8/3viv+Fu2bDGWFSpUiOPHj7Ny5UoOHjxItWrVmD9/PtbW1sn66NevHzdv3mT9+vXExsYydepUhg4dSq5cuVi/fj2+vr4UK1YMgBEjRvD1119jMBhYt24dlpaWTJky5amzYM7OzixatIgyZcqwdetW7t27R69evZgzZ84TdYcNG8bhw4exsbHhs88+o27dugDY29sTHh7Ohg0bWLt2LdbW1gwdOpTWrVu/8HW7fv06AEuXLmXq1Kl07NiRPHnycOfOHQCyZ8/OkiVLKF26NN7e3tjZ2fH++++neZznfQY///wzDRo04MqVK1y8eJH+/fs/t08LCwtatmwJwNixY4mIiKBgwYLJErSkfkNDQ7l48SIDBgxI1keePHlYt24d9evXx8vLi0OHDtGyZUtWrlyJhYVFqs6tcuXKxvMIDw9PVjZy5EgOHz7M3r17yZIlC2vXruX9998nODiYgwcPUqdOHdauXUvhwoU5fvw43t7eAPj7+zN//vwUZ7f+qVSpUqxevZqKFSuyZ88ezp07R+fOnZk3b16q4hcRERGRJ701iVTTpk3p06cPJUqUIDo6mlOnThnLhg4dyoEDB4iMjCQkJISoqChy5cpFmTJlkvXh5uZGjx49GDhwIHFxcbRp0wZ4dHP/F198Qc+ePY3Lxnr16gXA0aNHiYqKws/PD4AePXqkGN8nn3wCwI8//sjgwYNp27YtDx8+pFGjRsbkLEnv3r0ZMGAAXl5eAJQvXx6ANWvWMG/ePEJDQ4mMjOSvv/4CMCZaL2Ls2LEEBQWRP39+unbtyowZMzh69ChdunQBoEmTJmTPnp3g4GA6dOhAt27dOHPmTJrHedZn4ODgYJwZ69SpE4MGDWLJkiXP7bNx48bY2dlx48YNPD092blzJ/D3LNXj/Xbu3JmBAweyfPnyZH20a9cOOzs7Lly4wJUrV7h69So3b96kRIkS1KlT55njjxs3jrCwMDZv3oy5uTk+Pj5s3bo1WZ1Ro0YxePBgRowYQePGjSlcuDAXLlygbdu29OnTh23btpE1a1a6dOnCnj17jEv4Tpw4wZgxY/Dx8XnudejRoweWlpacP3+emzdvEhgYSHR0NHXr1k02MysiIiIiqffW3CPVq1cvtm/fTpUqVVi/fj3Tp0/H09OTe/fusWvXLhwcHJ5okytXrmTvjx49anyddN9NdHQ0p0+fNh6Pi4sjV65cZMuWDXj0B/rjihYtmmJ8Sf0FBAQAj2YuwsPDyZcvH46OjgQHBxvrJo2XNCuUNNsxadIkunfv/tzzSIvz589Tt25dKleuzLvvvku7du0oV64crq6urFixgnz58gEQEhJibBMUFETFihWf2qeJSfL83c7O7pmfQXx8PAAPHjwgNDQUINn1eJqkhGnHjh0kJiaybds22rdvT6tWrRg1ahT29vbGfi9fvgzwxAxP0udSsmRJSpYsmaysSJEizxzf09MTX19f7t69i5+fH9u3bzeeS5KUvlNBQUHG++8CAwMBjEsgX0RSv9WqVaNatWpPnEPSGCIiIiKSem/NjFSSU6dOERUVhampKUWKFKFmzZo4ODhw7do1KlSoQKFChbh9+zbw96YSSWJiYoyvL126BICVlRXlypUzHjc1NeXWrVvcv38feLRkz8HBwfjTrFmzFONK6i9phsDOzo6cOXMCGP/IT/LPP8aTfPDBBwAMGDCA/PnzG2dt/nkeaVGjRg3MzMzw8fFhzpw5DB48GHh03qamply9ehVIniAmbYiQJCoqCgAbGxsASpcunaz8eZ9B0hhZsmQhf/78AE/M0v2Tra0tzs7OAHTt2pWwsDAWLVoEPFqO2KRJk2T9JiVx/5yhSfpctm7dmuxzrFixIitXrnxmDNu2bcPV1ZVp06axZcuWFD+3lL5Tj59b0rX853cgLZL6nTt3brJzqFmzpnGWTkRERETS5q2ZkercuTN16tShQoUK5MiRg6ioKM6ePWucVciVKxfffPMNhQsXfuLeqJSEh4fj7u5OmzZtWL16Ndu3bydHjhyEhITw7bffsnjxYgYMGMCqVavw8PAga9asVKtWDU9PT4YMGfJEf4sXL8bFxYVBgwZRuHBhKlasiLm5OXv37iU4ODhVMxI3btwge/bs9O7dG2dnZ5o3b57m6/RP48aNw97eHm9vb65fv25cCufp6Ul8fDw7d+7kzp07FCtWjNWrVxMbG5sssYS/Z9CcnZ1xdXXFxcXlibjh6Z9BWFgYnp6e1KpVi1WrVuHj42NMGp/mgw8+wMrKirt373Lo0CHj8RIlSlC8eHHatWvHli1bOHToELVr12bVqlWcPHnyiX7d3d0ZNGgQLVq0YOXKlVy6dInChQtTq1Ytateu/VIJzj/t3LmTS5cuUbRoUdzc3AgPD6dFixY8ePDguUnbs/z666907dqV3r17U6hQIcLDwylRogTVq1dP1aYZIiIiIvKkt2ZGKukeqVKlSnHkyBE+/vhjbt26hbe3NzNmzCAyMpIGDRqwfv1640zF8wwbNozp06cTHh5OmzZtqFy5snGJ26RJkxg/fjy3b9+mTZs21K1bl6CgIDZu3JhiX7t27eLTTz/F39+fVq1aYWtry7Jly+jbt2+qz3HYsGEEBARQunRprK2tn7jf50WsWLG
|
||
|
"text/plain": [
|
||
|
"<Figure size 1008x864 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import seaborn as sns; sns.set()\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from matplotlib.patches import Arc, Rectangle, ConnectionPatch\n",
|
||
|
"\n",
|
||
|
"a4_dims = (14 ,12)\n",
|
||
|
"bg = \"#181818\"\n",
|
||
|
"flatui = ['dodgerblue','firebrick']\n",
|
||
|
"\n",
|
||
|
"#player = sns.load_dataset(\"players\")\n",
|
||
|
"sns.set(rc={ 'grid.color': '#5c5b5b','grid.linestyle': '-','axes.edgecolor': '#000000','axes.facecolor':bg, 'figure.facecolor':bg,'ytick.color':'white','xtick.color':'white' ,'axes.labelcolor': 'white',})\n",
|
||
|
"\n",
|
||
|
"#initialize\n",
|
||
|
"fig, ax = plt.subplots(figsize=a4_dims)\n",
|
||
|
"\n",
|
||
|
"#draw the green rectangle and add to plot\n",
|
||
|
"rect = Rectangle([24,-2], width = 4, height = 104, fill = True,color=\"seagreen\",zorder=5,alpha=0.5)\n",
|
||
|
"ax.add_patch(rect)\n",
|
||
|
"\n",
|
||
|
"#add scatter points\n",
|
||
|
"ax = sns.scatterplot(x=\"Age\", y=\"Percentile\",palette = flatui, data=players,s=150,legend = False,alpha=0.8,zorder =8,hue = 'years')\n",
|
||
|
"plt.ylim(-2, 101)\n",
|
||
|
"plt.xlim(16, 40)\n",
|
||
|
"plt.title('Barcelona | Squad Age Profile \\n2018/2019',color = 'white',loc = 'left',fontweight = 'semibold')\n",
|
||
|
"\n",
|
||
|
"#add anotations\n",
|
||
|
"for line in range(len(players)):\n",
|
||
|
" ax.text(players.iloc[line]['Age']+0.2, players.iloc[line]['Percentile'], players.iloc[line]['Name'], horizontalalignment='left', size='medium', color='white',zorder = 8)\n",
|
||
|
"\n",
|
||
|
"#firebrick"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.6.8"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|