#!/usr/bin/env python # coding: utf-8 # In[ ]: import pandas as pd import plotly.express as px import seaborn as sns df = pd.read_csv(r'./body_performance.csv') #print(df.dtypes) #df['BMI'] = float(df['weight_kg'])/(float(df['height_cm'])*0.01)^2 df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm']) df.head() # In[ ]: df.duplicated().sum() print(f'with duplicates:{df.shape}') df.drop_duplicates(inplace=True) print(f'without duplicates:{df.shape}') df_copy = df.copy() # In[ ]: df.describe(include='all') # In[ ]: df.info() # In[ ]: df["class"].value_counts().plot(kind="bar") # In[ ]: df[["class","body fat_%"]].groupby("class").mean().plot(kind="bar") # In[ ]: sns.set_theme() sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class') # In[ ]: sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class') # In[ ]: sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class') # In[ ]: px.box(df, y=['height_cm', 'weight_kg', 'body fat_%', 'diastolic', 'systolic', 'gripForce', 'sit and bend forward_cm', 'sit-ups counts', 'broad jump_cm', 'BMI']) # In[ ]: # this is taking too long time #sns.pairplot(data=df.drop(columns=["gender"]).head(500), hue="class") # In[ ]: # In[ ]: