ium_444421/classification_net.ipynb at 4a8da732fc9e0dd687eea4476e6c492011ad8f46

2022-04-23 23:27:19 +02:00

14 KiB

Raw Blame History

!kaggle datasets download -d kukuroo3/body-performance-data

!unzip -o body-performance-data.zip

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch
from torch import nn, optim

df = pd.read_csv('bodyPerformance.csv')
df.shape

(13393, 12)

df.head()

	age	gender	height_cm	weight_kg	body fat_%	diastolic	systolic	gripForce	sit and bend forward_cm	sit-ups counts	broad jump_cm	class
0	27.0	M	172.3	75.24	21.3	80.0	130.0	54.9	18.4	60.0	217.0	C
1	25.0	M	165.0	55.80	15.7	77.0	126.0	36.4	16.3	53.0	229.0	A
2	31.0	M	179.6	78.00	20.1	92.0	152.0	44.8	12.0	49.0	181.0	C
3	32.0	M	174.5	71.10	18.4	76.0	147.0	41.4	15.2	53.0	219.0	B
4	28.0	M	173.8	67.70	17.1	70.0	127.0	43.5	27.1	45.0	217.0	B

cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
df = df[cols]

# male - 0, female - 1
df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
df = df.dropna(how='any')

df.gender.value_counts() / df.shape[0]

0    0.632196
1    0.367804
Name: gender, dtype: float64

X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
y = df[['gender']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.from_numpy(np.array(X_train)).float()
y_train = torch.squeeze(torch.from_numpy(y_train.values).float())

X_test = torch.from_numpy(np.array(X_test)).float()
y_test = torch.squeeze(torch.from_numpy(y_test.values).float())

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

torch.Size([10714, 5]) torch.Size([10714])
torch.Size([2679, 5]) torch.Size([2679])

class Net(nn.Module):
  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, 5)
    self.fc2 = nn.Linear(5, 3)
    self.fc3 = nn.Linear(3, 1)
  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return torch.sigmoid(self.fc3(x))
net = Net(X_train.shape[1])

criterion = nn.BCELoss()

optimizer = optim.Adam(net.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

net = net.to(device)
criterion = criterion.to(device)

def calculate_accuracy(y_true, y_pred):
  predicted = y_pred.ge(.5).view(-1)
  return (y_true == predicted).sum().float() / len(y_true)

def round_tensor(t, decimal_places=3):
  return round(t.item(), decimal_places)
for epoch in range(1000):
    y_pred = net(X_train)
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train)
    if epoch % 100 == 0:
      train_acc = calculate_accuracy(y_train, y_pred)
      y_test_pred = net(X_test)
      y_test_pred = torch.squeeze(y_test_pred)
      test_loss = criterion(y_test_pred, y_test)
      test_acc = calculate_accuracy(y_test, y_test_pred)
      print(
f'''epoch {epoch}
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
''')
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

epoch 0
Train set - loss: 1.005, accuracy: 0.37
Test  set - loss: 1.018, accuracy: 0.358

epoch 100
Train set - loss: 0.677, accuracy: 0.743
Test  set - loss: 0.679, accuracy: 0.727

epoch 200
Train set - loss: 0.636, accuracy: 0.79
Test  set - loss: 0.64, accuracy: 0.778

epoch 300
Train set - loss: 0.568, accuracy: 0.839
Test  set - loss: 0.577, accuracy: 0.833

epoch 400
Train set - loss: 0.504, accuracy: 0.885
Test  set - loss: 0.514, accuracy: 0.877

epoch 500
Train set - loss: 0.441, accuracy: 0.922
Test  set - loss: 0.45, accuracy: 0.913

epoch 600
Train set - loss: 0.388, accuracy: 0.944
Test  set - loss: 0.396, accuracy: 0.938

epoch 700
Train set - loss: 0.353, accuracy: 0.954
Test  set - loss: 0.359, accuracy: 0.949

epoch 800
Train set - loss: 0.327, accuracy: 0.958
Test  set - loss: 0.333, accuracy: 0.953

epoch 900
Train set - loss: 0.306, accuracy: 0.961
Test  set - loss: 0.312, accuracy: 0.955

# torch.save(net, 'model.pth')

# net = torch.load('model.pth')

classes = ['Male', 'Female']
y_pred = net(X_test)
y_pred = y_pred.ge(.5).view(-1).cpu()
y_test = y_test.cpu()
print(classification_report(y_test, y_pred, target_names=classes))

              precision    recall  f1-score   support

        Male       0.97      0.96      0.96      1720
      Female       0.93      0.94      0.94       959

    accuracy                           0.95      2679
   macro avg       0.95      0.95      0.95      2679
weighted avg       0.95      0.95      0.95      2679

with open('test_out.csv', 'w') as file:
    for y in y_pred:
        file.write(classes[y.item()])
        file.write('\n')

14 KiB Raw Blame History

14 KiB

Raw Blame History