ium_s487178/body_performance.ipynb

177 lines
3.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "74524ede",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import plotly.express as px\n",
"import seaborn as sns\n",
"\n",
"df = pd.read_csv(r'.\\body_performance.csv')\n",
"\n",
"#print(df.dtypes)\n",
"#df['BMI'] = float(df['weight_kg'])/(float(df['height_cm'])*0.01)^2\n",
"df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0177f243",
"metadata": {},
"outputs": [],
"source": [
"df.duplicated().sum()\n",
"print(f'with duplicates:{df.shape}')\n",
"df.drop_duplicates(inplace=True)\n",
"print(f'without duplicates:{df.shape}')\n",
"df_copy = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f3ad57a",
"metadata": {},
"outputs": [],
"source": [
"df.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5cd376cf",
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4857a167",
"metadata": {},
"outputs": [],
"source": [
"df[\"class\"].value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "779157c0",
"metadata": {},
"outputs": [],
"source": [
"df[[\"class\",\"body fat_%\"]].groupby(\"class\").mean().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da14bf43",
"metadata": {},
"outputs": [],
"source": [
"sns.set_theme()\n",
"\n",
"sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6597e57c",
"metadata": {},
"outputs": [],
"source": [
"sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "957e1b2e",
"metadata": {},
"outputs": [],
"source": [
"sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f0394f0",
"metadata": {},
"outputs": [],
"source": [
"px.box(df, y=['height_cm',\n",
" 'weight_kg',\n",
" 'body fat_%',\n",
" 'diastolic',\n",
" 'systolic',\n",
" 'gripForce',\n",
" 'sit and bend forward_cm',\n",
" 'sit-ups counts',\n",
" 'broad jump_cm',\n",
" 'BMI'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22542bba",
"metadata": {},
"outputs": [],
"source": [
"# this is taking too long time\n",
"#sns.pairplot(data=df.drop(columns=[\"gender\"]).head(500), hue=\"class\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29730d20",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc21a9cb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}