ium_s487178/.ipynb_checkpoints/body_performance-checkpoint.ipynb
2023-04-05 20:23:36 +02:00

275 lines
7.3 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "74524ede",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" age gender height_cm weight_kg body fat_% diastolic systolic \\\n",
"0 27.0 M 172.3 75.24 21.3 80.0 130.0 \n",
"1 25.0 M 165.0 55.80 15.7 77.0 126.0 \n",
"2 31.0 M 179.6 78.00 20.1 92.0 152.0 \n",
"3 32.0 M 174.5 71.10 18.4 76.0 147.0 \n",
"4 28.0 M 173.8 67.70 17.1 70.0 127.0 \n",
"\n",
" gripForce sit and bend forward_cm sit-ups counts broad jump_cm class \\\n",
"0 54.9 18.4 60.0 217.0 C \n",
"1 36.4 16.3 53.0 229.0 A \n",
"2 44.8 12.0 49.0 181.0 C \n",
"3 41.4 15.2 53.0 219.0 B \n",
"4 43.5 27.1 45.0 217.0 B \n",
"\n",
" BMI \n",
"0 25.344179 \n",
"1 20.495868 \n",
"2 24.181428 \n",
"3 23.349562 \n",
"4 22.412439 \n"
]
}
],
"source": [
"import pandas as pd\n",
"import plotly.express as px\n",
"import seaborn as sns\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"df = pd.read_csv(os.path.join('.', 'body_performance.csv'))\n",
"\n",
"df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])\n",
"print(df.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0177f243",
"metadata": {},
"outputs": [],
"source": [
"df.duplicated().sum()\n",
"print(f'with duplicates:{df.shape}')\n",
"df.drop_duplicates(inplace=True)\n",
"print(f'without duplicates:{df.shape}')\n",
"df_copy = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8abefe6e",
"metadata": {},
"outputs": [],
"source": [
"body_train, body_test = train_test_split(df, test_size=int(df[\"age\"].count()*0.2), random_state=1)\n",
"body_test, body_valid = train_test_split(body_test, test_size=int(body_test[\"age\"].count()*0.5), random_state=1)\n",
"\n",
"print(\"number of elements in data frame: {}\".format(df['age'].count()))\n",
"print(\"train: {}\".format(body_train[\"age\"].count()))\n",
"print(\"test: {}\".format(body_test[\"age\"].count()))\n",
"print(\"valid: {}\".format(body_valid[\"age\"].count()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f3ad57a",
"metadata": {},
"outputs": [],
"source": [
"print(df.describe(include='all'))\n",
"#sit and bend forward_cm jest na minusie!!!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b694be50",
"metadata": {},
"outputs": [],
"source": [
"scaler = MinMaxScaler()\n",
"df[['age', 'height_cm', 'weight_kg','body fat_%',\n",
" 'diastolic','systolic','gripForce','sit-ups counts',\n",
" 'broad jump_cm','BMI']] = scaler.fit_transform(df[[\n",
" 'age', 'height_cm', 'weight_kg','body fat_%',\n",
" 'diastolic','systolic','gripForce','sit-ups counts',\n",
" 'broad jump_cm','BMI']])\n",
"\n",
"scaler = MinMaxScaler(feature_range=(-1, 1))\n",
"df['sit and bend forward_cm'] = scaler.fit_transform(df[['sit and bend forward_cm']])\n",
"df.describe(include='all')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5cd376cf",
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2375b677",
"metadata": {},
"outputs": [],
"source": [
"print('Each class in data frame: \\n{}'.format(df['class'].value_counts()))\n",
"print('Each class in train data: \\n{}'.format(body_train['class'].value_counts()))\n",
"print('Each class in test data: \\n{}'.format(body_test['class'].value_counts()))\n",
"print('Each class in valid data: \\n{}'.format(body_valid['class'].value_counts()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "781b7e0b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "225a3cd0",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4857a167",
"metadata": {},
"outputs": [],
"source": [
"#df[\"class\"].value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "779157c0",
"metadata": {},
"outputs": [],
"source": [
"#df[[\"class\",\"body fat_%\"]].groupby(\"class\").mean().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da14bf43",
"metadata": {},
"outputs": [],
"source": [
"#sns.set_theme()\n",
"\n",
"#sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6597e57c",
"metadata": {},
"outputs": [],
"source": [
"#sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "957e1b2e",
"metadata": {},
"outputs": [],
"source": [
"#sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f0394f0",
"metadata": {},
"outputs": [],
"source": [
"#px.box(df, y=['height_cm',\n",
"# 'weight_kg',\n",
"# 'body fat_%',\n",
"# 'diastolic',\n",
"# 'systolic',\n",
"# 'gripForce',\n",
"# 'sit and bend forward_cm',\n",
"# 'sit-ups counts',\n",
"# 'broad jump_cm',\n",
"# 'BMI'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22542bba",
"metadata": {},
"outputs": [],
"source": [
"# this is taking too long time\n",
"#sns.pairplot(data=df.drop(columns=[\"gender\"]).head(500), hue=\"class\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29730d20",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc21a9cb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}