update python file

This commit is contained in:
s487178 2023-04-05 19:40:37 +02:00
parent e3e20473d4
commit 05c892510a

View File

@ -7,13 +7,13 @@
import pandas as pd import pandas as pd
import plotly.express as px import plotly.express as px
import seaborn as sns import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv(r'./body_performance.csv') df = pd.read_csv(r'.\body_performance.csv')
#print(df.dtypes)
#df['BMI'] = float(df['weight_kg'])/(float(df['height_cm'])*0.01)^2
df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm']) df['BMI'] = df['weight_kg']/(0.0001*df['height_cm']*df['height_cm'])
df.head() print(df.head())
# In[ ]: # In[ ]:
@ -29,9 +29,39 @@ df_copy = df.copy()
# In[ ]: # In[ ]:
body_train, body_test = train_test_split(df, test_size=int(df["age"].count()*0.2), random_state=1)
body_test, body_valid = train_test_split(body_test, test_size=int(body_test["age"].count()*0.5), random_state=1)
print("number of elements in data frame: {}".format(df['age'].count()))
print("train: {}".format(body_train["age"].count()))
print("test: {}".format(body_test["age"].count()))
print("valid: {}".format(body_valid["age"].count()))
# In[ ]:
print(df.describe(include='all'))
#sit and bend forward_cm jest na minusie!!!
# In[ ]:
scaler = MinMaxScaler()
df[['age', 'height_cm', 'weight_kg','body fat_%',
'diastolic','systolic','gripForce','sit-ups counts',
'broad jump_cm','BMI']] = scaler.fit_transform(df[[
'age', 'height_cm', 'weight_kg','body fat_%',
'diastolic','systolic','gripForce','sit-ups counts',
'broad jump_cm','BMI']])
scaler = MinMaxScaler(feature_range=(-1, 1))
df['sit and bend forward_cm'] = scaler.fit_transform(df[['sit and bend forward_cm']])
df.describe(include='all') df.describe(include='all')
# In[ ]: # In[ ]:
@ -41,48 +71,70 @@ df.info()
# In[ ]: # In[ ]:
df["class"].value_counts().plot(kind="bar") print('Each class in data frame: \n{}'.format(df['class'].value_counts()))
print('Each class in train data: \n{}'.format(body_train['class'].value_counts()))
print('Each class in test data: \n{}'.format(body_test['class'].value_counts()))
print('Each class in valid data: \n{}'.format(body_valid['class'].value_counts()))
# In[ ]: # In[ ]:
df[["class","body fat_%"]].groupby("class").mean().plot(kind="bar")
# In[ ]: # In[ ]:
sns.set_theme()
sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')
# In[ ]: # In[ ]:
sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class') #df["class"].value_counts().plot(kind="bar")
# In[ ]: # In[ ]:
sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class') #df[["class","body fat_%"]].groupby("class").mean().plot(kind="bar")
# In[ ]: # In[ ]:
px.box(df, y=['height_cm', #sns.set_theme()
'weight_kg',
'body fat_%', #sns.relplot(data = df.head(200), x = 'broad jump_cm', y = 'sit-ups counts', hue = 'class')
'diastolic',
'systolic',
'gripForce', # In[ ]:
'sit and bend forward_cm',
'sit-ups counts',
'broad jump_cm', #sns.relplot(data = df[df['gender'] == 'M'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')
'BMI'])
# In[ ]:
#sns.relplot(data = df[df['gender'] == 'F'].head(200), x = 'body fat_%', y = 'BMI', hue = 'class')
# In[ ]:
#px.box(df, y=['height_cm',
# 'weight_kg',
# 'body fat_%',
# 'diastolic',
# 'systolic',
# 'gripForce',
# 'sit and bend forward_cm',
# 'sit-ups counts',
# 'broad jump_cm',
# 'BMI'])
# In[ ]: # In[ ]: