ium_s487178/body_performance.ipynb

3.7 KiB

import pandas as pd
import plotly.express as px
import seaborn as sns

df = pd.read_csv(r'.\body_performance.csv')

#print(df.dtypes)
#df['BMI'] = float(df['weight_kg'])/(float(df['height_cm'])*0.01)^2
df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])
df.head()
df.duplicated().sum()
print(f'with duplicates:{df.shape}')
df.drop_duplicates(inplace=True)
print(f'without duplicates:{df.shape}')
df_copy = df.copy()
df.describe(include='all')
df.info()
df["class"].value_counts().plot(kind="bar")
df[["class","body fat_%"]].groupby("class").mean().plot(kind="bar")
sns.set_theme()

sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')
sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')
sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')
px.box(df, y=['height_cm',
              'weight_kg',
              'body fat_%',
              'diastolic',
              'systolic',
              'gripForce',
              'sit and bend forward_cm',
              'sit-ups counts',
              'broad jump_cm',
              'BMI'])
# this is taking too long time
#sns.pairplot(data=df.drop(columns=["gender"]).head(500), hue="class")