ium_s487178/.ipynb_checkpoints/body_performance-checkpoint.ipynb

247 lines
6.0 KiB
Plaintext
Raw Normal View History

2023-04-05 14:40:41 +02:00
{
"cells": [
{
"cell_type": "code",
2023-04-05 19:36:18 +02:00
"execution_count": null,
2023-04-05 14:40:41 +02:00
"id": "74524ede",
"metadata": {},
2023-04-05 19:36:18 +02:00
"outputs": [],
2023-04-05 14:40:41 +02:00
"source": [
"import pandas as pd\n",
"import plotly.express as px\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"df = pd.read_csv(r'.\\body_performance.csv')\n",
"\n",
"df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])\n",
"print(df.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0177f243",
"metadata": {},
"outputs": [],
"source": [
"df.duplicated().sum()\n",
"print(f'with duplicates:{df.shape}')\n",
"df.drop_duplicates(inplace=True)\n",
"print(f'without duplicates:{df.shape}')\n",
"df_copy = df.copy()"
]
},
{
"cell_type": "code",
2023-04-05 19:36:18 +02:00
"execution_count": null,
"id": "8abefe6e",
2023-04-05 14:40:41 +02:00
"metadata": {},
2023-04-05 19:36:18 +02:00
"outputs": [],
2023-04-05 14:40:41 +02:00
"source": [
"body_train, body_test = train_test_split(df, test_size=int(df[\"age\"].count()*0.2), random_state=1)\n",
"body_test, body_valid = train_test_split(body_test, test_size=int(body_test[\"age\"].count()*0.5), random_state=1)\n",
"\n",
"print(\"number of elements in data frame: {}\".format(df['age'].count()))\n",
"print(\"train: {}\".format(body_train[\"age\"].count()))\n",
"print(\"test: {}\".format(body_test[\"age\"].count()))\n",
"print(\"valid: {}\".format(body_valid[\"age\"].count()))"
]
},
{
"cell_type": "code",
2023-04-05 19:36:18 +02:00
"execution_count": null,
2023-04-05 14:40:41 +02:00
"id": "0f3ad57a",
"metadata": {},
2023-04-05 19:36:18 +02:00
"outputs": [],
2023-04-05 14:40:41 +02:00
"source": [
"print(df.describe(include='all'))\n",
"#sit and bend forward_cm jest na minusie!!!"
]
},
{
"cell_type": "code",
2023-04-05 19:36:18 +02:00
"execution_count": null,
"id": "b694be50",
2023-04-05 14:40:41 +02:00
"metadata": {},
2023-04-05 19:36:18 +02:00
"outputs": [],
2023-04-05 14:40:41 +02:00
"source": [
"scaler = MinMaxScaler()\n",
"df[['age', 'height_cm', 'weight_kg','body fat_%',\n",
" 'diastolic','systolic','gripForce','sit-ups counts',\n",
" 'broad jump_cm','BMI']] = scaler.fit_transform(df[[\n",
" 'age', 'height_cm', 'weight_kg','body fat_%',\n",
" 'diastolic','systolic','gripForce','sit-ups counts',\n",
" 'broad jump_cm','BMI']])\n",
"\n",
"scaler = MinMaxScaler(feature_range=(-1, 1))\n",
"df['sit and bend forward_cm'] = scaler.fit_transform(df[['sit and bend forward_cm']])\n",
"df.describe(include='all')\n",
"\n"
]
},
{
"cell_type": "code",
2023-04-05 19:36:18 +02:00
"execution_count": null,
2023-04-05 14:40:41 +02:00
"id": "5cd376cf",
"metadata": {},
2023-04-05 19:36:18 +02:00
"outputs": [],
2023-04-05 14:40:41 +02:00
"source": [
"df.info()"
]
},
{
"cell_type": "code",
2023-04-05 19:36:18 +02:00
"execution_count": null,
"id": "2375b677",
2023-04-05 14:40:41 +02:00
"metadata": {},
2023-04-05 19:36:18 +02:00
"outputs": [],
2023-04-05 14:40:41 +02:00
"source": [
"print('Each class in data frame: \\n{}'.format(df['class'].value_counts()))\n",
"print('Each class in train data: \\n{}'.format(body_train['class'].value_counts()))\n",
"print('Each class in test data: \\n{}'.format(body_test['class'].value_counts()))\n",
"print('Each class in valid data: \\n{}'.format(body_valid['class'].value_counts()))"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-05 19:36:18 +02:00
"id": "781b7e0b",
2023-04-05 14:40:41 +02:00
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
2023-04-05 19:36:18 +02:00
"id": "225a3cd0",
2023-04-05 14:40:41 +02:00
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4857a167",
"metadata": {},
"outputs": [],
"source": [
"#df[\"class\"].value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "779157c0",
"metadata": {},
"outputs": [],
"source": [
"#df[[\"class\",\"body fat_%\"]].groupby(\"class\").mean().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da14bf43",
"metadata": {},
"outputs": [],
"source": [
"#sns.set_theme()\n",
"\n",
"#sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6597e57c",
"metadata": {},
"outputs": [],
"source": [
"#sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "957e1b2e",
"metadata": {},
"outputs": [],
"source": [
"#sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f0394f0",
"metadata": {},
"outputs": [],
"source": [
"#px.box(df, y=['height_cm',\n",
"# 'weight_kg',\n",
"# 'body fat_%',\n",
"# 'diastolic',\n",
"# 'systolic',\n",
"# 'gripForce',\n",
"# 'sit and bend forward_cm',\n",
"# 'sit-ups counts',\n",
"# 'broad jump_cm',\n",
"# 'BMI'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22542bba",
"metadata": {},
"outputs": [],
"source": [
"# this is taking too long time\n",
"#sns.pairplot(data=df.drop(columns=[\"gender\"]).head(500), hue=\"class\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29730d20",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc21a9cb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}