106 lines
1.5 KiB
Python
106 lines
1.5 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# In[ ]:
|
|
|
|
|
|
import pandas as pd
|
|
import plotly.express as px
|
|
import seaborn as sns
|
|
|
|
df = pd.read_csv(r'./body_performance.csv')
|
|
|
|
#print(df.dtypes)
|
|
#df['BMI'] = float(df['weight_kg'])/(float(df['height_cm'])*0.01)^2
|
|
df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])
|
|
df.head()
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
df.duplicated().sum()
|
|
print(f'with duplicates:{df.shape}')
|
|
df.drop_duplicates(inplace=True)
|
|
print(f'without duplicates:{df.shape}')
|
|
df_copy = df.copy()
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
df.describe(include='all')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
df.info()
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
df["class"].value_counts().plot(kind="bar")
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
df[["class","body fat_%"]].groupby("class").mean().plot(kind="bar")
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
sns.set_theme()
|
|
|
|
sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
px.box(df, y=['height_cm',
|
|
'weight_kg',
|
|
'body fat_%',
|
|
'diastolic',
|
|
'systolic',
|
|
'gripForce',
|
|
'sit and bend forward_cm',
|
|
'sit-ups counts',
|
|
'broad jump_cm',
|
|
'BMI'])
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
# this is taking too long time
|
|
#sns.pairplot(data=df.drop(columns=["gender"]).head(500), hue="class")
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|