14 KiB
14 KiB
!kaggle datasets download -d kukuroo3/body-performance-data
!unzip -o body-performance-data.zip
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch
from torch import nn, optim
df = pd.read_csv('bodyPerformance.csv')
df.shape
(13393, 12)
df.head()
age | gender | height_cm | weight_kg | body fat_% | diastolic | systolic | gripForce | sit and bend forward_cm | sit-ups counts | broad jump_cm | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27.0 | M | 172.3 | 75.24 | 21.3 | 80.0 | 130.0 | 54.9 | 18.4 | 60.0 | 217.0 | C |
1 | 25.0 | M | 165.0 | 55.80 | 15.7 | 77.0 | 126.0 | 36.4 | 16.3 | 53.0 | 229.0 | A |
2 | 31.0 | M | 179.6 | 78.00 | 20.1 | 92.0 | 152.0 | 44.8 | 12.0 | 49.0 | 181.0 | C |
3 | 32.0 | M | 174.5 | 71.10 | 18.4 | 76.0 | 147.0 | 41.4 | 15.2 | 53.0 | 219.0 | B |
4 | 28.0 | M | 173.8 | 67.70 | 17.1 | 70.0 | 127.0 | 43.5 | 27.1 | 45.0 | 217.0 | B |
cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
df = df[cols]
# male - 0, female - 1
df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
df = df.dropna(how='any')
df.gender.value_counts() / df.shape[0]
0 0.632196 1 0.367804 Name: gender, dtype: float64
X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
y = df[['gender']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.from_numpy(np.array(X_train)).float()
y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
X_test = torch.from_numpy(np.array(X_test)).float()
y_test = torch.squeeze(torch.from_numpy(y_test.values).float())
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
torch.Size([10714, 5]) torch.Size([10714]) torch.Size([2679, 5]) torch.Size([2679])
class Net(nn.Module):
def __init__(self, n_features):
super(Net, self).__init__()
self.fc1 = nn.Linear(n_features, 5)
self.fc2 = nn.Linear(5, 3)
self.fc3 = nn.Linear(3, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return torch.sigmoid(self.fc3(x))
net = Net(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)
net = net.to(device)
criterion = criterion.to(device)
def calculate_accuracy(y_true, y_pred):
predicted = y_pred.ge(.5).view(-1)
return (y_true == predicted).sum().float() / len(y_true)
def round_tensor(t, decimal_places=3):
return round(t.item(), decimal_places)
for epoch in range(1000):
y_pred = net(X_train)
y_pred = torch.squeeze(y_pred)
train_loss = criterion(y_pred, y_train)
if epoch % 100 == 0:
train_acc = calculate_accuracy(y_train, y_pred)
y_test_pred = net(X_test)
y_test_pred = torch.squeeze(y_test_pred)
test_loss = criterion(y_test_pred, y_test)
test_acc = calculate_accuracy(y_test, y_test_pred)
print(
f'''epoch {epoch}
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
Test set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
''')
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
epoch 0 Train set - loss: 1.005, accuracy: 0.37 Test set - loss: 1.018, accuracy: 0.358 epoch 100 Train set - loss: 0.677, accuracy: 0.743 Test set - loss: 0.679, accuracy: 0.727 epoch 200 Train set - loss: 0.636, accuracy: 0.79 Test set - loss: 0.64, accuracy: 0.778 epoch 300 Train set - loss: 0.568, accuracy: 0.839 Test set - loss: 0.577, accuracy: 0.833 epoch 400 Train set - loss: 0.504, accuracy: 0.885 Test set - loss: 0.514, accuracy: 0.877 epoch 500 Train set - loss: 0.441, accuracy: 0.922 Test set - loss: 0.45, accuracy: 0.913 epoch 600 Train set - loss: 0.388, accuracy: 0.944 Test set - loss: 0.396, accuracy: 0.938 epoch 700 Train set - loss: 0.353, accuracy: 0.954 Test set - loss: 0.359, accuracy: 0.949 epoch 800 Train set - loss: 0.327, accuracy: 0.958 Test set - loss: 0.333, accuracy: 0.953 epoch 900 Train set - loss: 0.306, accuracy: 0.961 Test set - loss: 0.312, accuracy: 0.955
# torch.save(net, 'model.pth')
# net = torch.load('model.pth')
classes = ['Male', 'Female']
y_pred = net(X_test)
y_pred = y_pred.ge(.5).view(-1).cpu()
y_test = y_test.cpu()
print(classification_report(y_test, y_pred, target_names=classes))
precision recall f1-score support Male 0.97 0.96 0.96 1720 Female 0.93 0.94 0.94 959 accuracy 0.95 2679 macro avg 0.95 0.95 0.95 2679 weighted avg 0.95 0.95 0.95 2679
with open('test_out.csv', 'w') as file:
for y in y_pred:
file.write(classes[y.item()])
file.write('\n')