2023-03-30 20:30:56 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "74524ede",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import plotly.express as px\n",
|
|
|
|
"import seaborn as sns\n",
|
2023-04-05 14:40:41 +02:00
|
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
|
|
"from sklearn.preprocessing import MinMaxScaler\n",
|
2023-03-30 20:30:56 +02:00
|
|
|
"\n",
|
|
|
|
"df = pd.read_csv(r'.\\body_performance.csv')\n",
|
|
|
|
"\n",
|
|
|
|
"df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])\n",
|
2023-04-05 14:40:41 +02:00
|
|
|
"print(df.head())"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "0177f243",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"df.duplicated().sum()\n",
|
|
|
|
"print(f'with duplicates:{df.shape}')\n",
|
|
|
|
"df.drop_duplicates(inplace=True)\n",
|
|
|
|
"print(f'without duplicates:{df.shape}')\n",
|
|
|
|
"df_copy = df.copy()"
|
|
|
|
]
|
|
|
|
},
|
2023-04-05 14:40:41 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
2023-04-05 19:36:18 +02:00
|
|
|
"id": "8abefe6e",
|
2023-04-05 14:40:41 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"body_train, body_test = train_test_split(df, test_size=int(df[\"age\"].count()*0.2), random_state=1)\n",
|
|
|
|
"body_test, body_valid = train_test_split(body_test, test_size=int(body_test[\"age\"].count()*0.5), random_state=1)\n",
|
|
|
|
"\n",
|
|
|
|
"print(\"number of elements in data frame: {}\".format(df['age'].count()))\n",
|
|
|
|
"print(\"train: {}\".format(body_train[\"age\"].count()))\n",
|
|
|
|
"print(\"test: {}\".format(body_test[\"age\"].count()))\n",
|
|
|
|
"print(\"valid: {}\".format(body_valid[\"age\"].count()))"
|
|
|
|
]
|
|
|
|
},
|
2023-03-30 20:30:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "0f3ad57a",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"print(df.describe(include='all'))\n",
|
|
|
|
"#sit and bend forward_cm jest na minusie!!!"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
2023-04-05 19:36:18 +02:00
|
|
|
"id": "b694be50",
|
2023-04-05 14:40:41 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"scaler = MinMaxScaler()\n",
|
|
|
|
"df[['age', 'height_cm', 'weight_kg','body fat_%',\n",
|
|
|
|
" 'diastolic','systolic','gripForce','sit-ups counts',\n",
|
|
|
|
" 'broad jump_cm','BMI']] = scaler.fit_transform(df[[\n",
|
|
|
|
" 'age', 'height_cm', 'weight_kg','body fat_%',\n",
|
|
|
|
" 'diastolic','systolic','gripForce','sit-ups counts',\n",
|
|
|
|
" 'broad jump_cm','BMI']])\n",
|
|
|
|
"\n",
|
|
|
|
"scaler = MinMaxScaler(feature_range=(-1, 1))\n",
|
|
|
|
"df['sit and bend forward_cm'] = scaler.fit_transform(df[['sit and bend forward_cm']])\n",
|
|
|
|
"df.describe(include='all')\n",
|
|
|
|
"\n"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "5cd376cf",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"df.info()"
|
|
|
|
]
|
|
|
|
},
|
2023-04-05 14:40:41 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
2023-04-05 19:36:18 +02:00
|
|
|
"id": "2375b677",
|
2023-04-05 14:40:41 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"print('Each class in data frame: \\n{}'.format(df['class'].value_counts()))\n",
|
|
|
|
"print('Each class in train data: \\n{}'.format(body_train['class'].value_counts()))\n",
|
|
|
|
"print('Each class in test data: \\n{}'.format(body_test['class'].value_counts()))\n",
|
|
|
|
"print('Each class in valid data: \\n{}'.format(body_valid['class'].value_counts()))"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
2023-04-05 19:36:18 +02:00
|
|
|
"id": "781b7e0b",
|
2023-04-05 14:40:41 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
2023-04-05 19:36:18 +02:00
|
|
|
"id": "225a3cd0",
|
2023-04-05 14:40:41 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"\n",
|
|
|
|
"\n"
|
|
|
|
]
|
|
|
|
},
|
2023-03-30 20:30:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "4857a167",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"#df[\"class\"].value_counts().plot(kind=\"bar\")"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "779157c0",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"#df[[\"class\",\"body fat_%\"]].groupby(\"class\").mean().plot(kind=\"bar\")"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "da14bf43",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"#sns.set_theme()\n",
|
2023-03-30 20:30:56 +02:00
|
|
|
"\n",
|
2023-04-05 14:40:41 +02:00
|
|
|
"#sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "6597e57c",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"#sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "957e1b2e",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"#sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "9f0394f0",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2023-04-05 14:40:41 +02:00
|
|
|
"#px.box(df, y=['height_cm',\n",
|
|
|
|
"# 'weight_kg',\n",
|
|
|
|
"# 'body fat_%',\n",
|
|
|
|
"# 'diastolic',\n",
|
|
|
|
"# 'systolic',\n",
|
|
|
|
"# 'gripForce',\n",
|
|
|
|
"# 'sit and bend forward_cm',\n",
|
|
|
|
"# 'sit-ups counts',\n",
|
|
|
|
"# 'broad jump_cm',\n",
|
|
|
|
"# 'BMI'])"
|
2023-03-30 20:30:56 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "22542bba",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# this is taking too long time\n",
|
|
|
|
"#sns.pairplot(data=df.drop(columns=[\"gender\"]).head(500), hue=\"class\")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "29730d20",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "dc21a9cb",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.10.9"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 5
|
|
|
|
}
|