#!/usr/bin/env python # coding: utf-8 # In[ ]: # get_ipython().system('unzip -o body-performance-data.zip') # In[4]: import pandas as pd from sklearn.model_selection import train_test_split # In[21]: df = pd.read_csv('data/bodyPerformance.csv') # In[22]: cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm'] df = df[cols] # male - 0, female - 1 df['gender'].replace({'M': 0, 'F': 1}, inplace = True) df = df.dropna(how='any') # In[23]: X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']] y = df[['gender']] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # In[24]: X_train.to_csv(r'X_train.csv', index=False) X_test.to_csv(r'X_test.csv', index=False) y_train.to_csv(r'y_train.csv', index=False) y_test.to_csv(r'y_test.csv', index=False)