#!/usr/bin/env python # coding: utf-8 # In[ ]: # get_ipython().system('kaggle datasets download -d kukuroo3/body-performance-data') # In[ ]: get_ipython().system('unzip -o body-performance-data.zip') # In[114]: import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import torch from torch import nn, optim import torch.nn.functional as F # In[115]: df = pd.read_csv('bodyPerformance.csv') df.shape # In[116]: df.head() # In[117]: cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm'] df = df[cols] # male - 0, female - 1 df['gender'].replace({'M': 0, 'F': 1}, inplace = True) df = df.dropna(how='any') # In[118]: df.gender.value_counts() / df.shape[0] # In[119]: X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']] y = df[['gender']] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # In[120]: X_train = torch.from_numpy(np.array(X_train)).float() y_train = torch.squeeze(torch.from_numpy(y_train.values).float()) X_test = torch.from_numpy(np.array(X_test)).float() y_test = torch.squeeze(torch.from_numpy(y_test.values).float()) print(X_train.shape, y_train.shape) print(X_test.shape, y_test.shape) # In[121]: class Net(nn.Module): def __init__(self, n_features): super(Net, self).__init__() self.fc1 = nn.Linear(n_features, 5) self.fc2 = nn.Linear(5, 3) self.fc3 = nn.Linear(3, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) return torch.sigmoid(self.fc3(x)) net = Net(X_train.shape[1]) # In[122]: criterion = nn.BCELoss() # In[123]: optimizer = optim.Adam(net.parameters(), lr=0.001) # In[124]: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # In[125]: X_train = X_train.to(device) y_train = y_train.to(device) X_test = X_test.to(device) y_test = y_test.to(device) # In[126]: net = net.to(device) criterion = criterion.to(device) # In[127]: def calculate_accuracy(y_true, y_pred): predicted = y_pred.ge(.5).view(-1) return (y_true == predicted).sum().float() / len(y_true) # In[128]: def round_tensor(t, decimal_places=3): return round(t.item(), decimal_places) for epoch in range(1000): y_pred = net(X_train) y_pred = torch.squeeze(y_pred) train_loss = criterion(y_pred, y_train) if epoch % 100 == 0: train_acc = calculate_accuracy(y_train, y_pred) y_test_pred = net(X_test) y_test_pred = torch.squeeze(y_test_pred) test_loss = criterion(y_test_pred, y_test) test_acc = calculate_accuracy(y_test, y_test_pred) print( f'''epoch {epoch} Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)} Test set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)} ''') optimizer.zero_grad() train_loss.backward() optimizer.step() # In[129]: # torch.save(net, 'model.pth') # In[130]: # net = torch.load('model.pth') # In[131]: classes = ['Male', 'Female'] y_pred = net(X_test) y_pred = y_pred.ge(.5).view(-1).cpu() y_test = y_test.cpu() print(classification_report(y_test, y_pred, target_names=classes)) # In[132]: with open('test_out.csv', 'w') as file: for y in y_pred: file.write(classes[y.item()]) file.write('\n')