Update files

2022-05-29 13:46:40 +02:00 · 2022-05-29 13:46:40 +02:00 · e8af09d8a7
commit e8af09d8a7
parent 4905e68ac6
8 changed files with 145 additions and 195 deletions
--- a/.dvc/config
+++ b/.dvc/config
@ -0,0 +1,6 @@
 [core]
    remote = ium_ssh_remote
 ['remote "my_local_remote"']
    url = /dvcstore
 ['remote "ium_ssh_remote"']
    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,8 @@ ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/
 /X_train.csv
 /X_test.csv
 /y_train.csv
 /y_test.csv
 /model.pth
--- a/3
+++ b/3
@ -13,5 +13,6 @@ WORKDIR /app
 COPY ./body-performance-data.zip ./
-COPY ./classification_net.py ./
+COPY ./prepare_datasets.py ./
 COPY ./train.py ./
--- a/classification_net.py
+++ b/classification_net.py
@ -1,192 +0,0 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 # get_ipython().system('kaggle datasets download -d kukuroo3/body-performance-data')
 # In[ ]:
 get_ipython().system('unzip -o body-performance-data.zip')
 # In[114]:
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import classification_report
 import torch
 from torch import nn, optim
 import torch.nn.functional as F
 # In[115]:
 df = pd.read_csv('bodyPerformance.csv')
 df.shape
 # In[116]:
 df.head()
 # In[117]:
 cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
 df = df[cols]
 # male - 0, female - 1
 df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
 df = df.dropna(how='any')
 # In[118]:
 df.gender.value_counts() / df.shape[0]
 # In[119]:
 X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
 y = df[['gender']]
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 # In[120]:
 X_train = torch.from_numpy(np.array(X_train)).float()
 y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
 X_test = torch.from_numpy(np.array(X_test)).float()
 y_test = torch.squeeze(torch.from_numpy(y_test.values).float())
 print(X_train.shape, y_train.shape)
 print(X_test.shape, y_test.shape)
 # In[121]:
 class Net(nn.Module):
  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, 5)
    self.fc2 = nn.Linear(5, 3)
    self.fc3 = nn.Linear(3, 1)
  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return torch.sigmoid(self.fc3(x))
 net = Net(X_train.shape[1])
 # In[122]:
 criterion = nn.BCELoss()
 # In[123]:
 optimizer = optim.Adam(net.parameters(), lr=0.001)
 # In[124]:
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # In[125]:
 X_train = X_train.to(device)
 y_train = y_train.to(device)
 X_test = X_test.to(device)
 y_test = y_test.to(device)
 # In[126]:
 net = net.to(device)
 criterion = criterion.to(device)
 # In[127]:
 def calculate_accuracy(y_true, y_pred):
  predicted = y_pred.ge(.5).view(-1)
  return (y_true == predicted).sum().float() / len(y_true)
 # In[128]:
 def round_tensor(t, decimal_places=3):
  return round(t.item(), decimal_places)
 for epoch in range(1000):
    y_pred = net(X_train)
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train)
    if epoch % 100 == 0:
      train_acc = calculate_accuracy(y_train, y_pred)
      y_test_pred = net(X_test)
      y_test_pred = torch.squeeze(y_test_pred)
      test_loss = criterion(y_test_pred, y_test)
      test_acc = calculate_accuracy(y_test, y_test_pred)
      print(
 f'''epoch {epoch}
 Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
 Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
 ''')
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
 # In[129]:
 # torch.save(net, 'model.pth')
 # In[130]:
 # net = torch.load('model.pth')
 # In[131]:
 classes = ['Male', 'Female']
 y_pred = net(X_test)
 y_pred = y_pred.ge(.5).view(-1).cpu()
 y_test = y_test.cpu()
 print(classification_report(y_test, y_pred, target_names=classes))
 # In[132]:
 with open('test_out.csv', 'w') as file:
    for y in y_pred:
        file.write(classes[y.item()])
        file.write('\n')
--- a/dvc.Jenkinsfile
+++ b/dvc.Jenkinsfile
@ -0,0 +1,17 @@
 pipeline {
    agent {
    	dockerfile true
    }
    stages {
 		stage('Dvc pull and reproduce') {
 			steps {
 				checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
                withCredentials([string(credentialsId: 'ium-sftp-password', variable: 'IUM_SFTP_PASS')]) {
                sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
                sh 'dvc remote modify --local ium_ssh_remote password $IUM_SFTP_KEY'
                sh 'dvc pull'
    }
 			}
 		}
    }
 }
--- a/dvc.yaml
+++ b/dvc.yaml
@ -0,0 +1,19 @@
 stages:
  prepare_datasets:
    cmd: python prepare_datasets.py X_train.csv X_test.csv y_train.csv y_test.csv
    deps:
    - data/bodyPerformance.csv
    - prepare_datasets.py
    outs:
    - X_test.csv
    - X_train.csv
    - y_test.csv
    - y_train.csv
  train:
    cmd: python train.py model.pth
    deps:
    - X_train.csv
    - train.py
    - y_train.csv
    outs:
    - model.pth
--- a/prepare_datasets.py
+++ b/prepare_datasets.py
@ -4,7 +4,7 @@
 # In[ ]:
-get_ipython().system('unzip -o body-performance-data.zip')
+# get_ipython().system('unzip -o body-performance-data.zip')
 # In[4]:
@ -17,7 +17,7 @@ from sklearn.model_selection import train_test_split
 # In[21]:
-df = pd.read_csv('bodyPerformance.csv')
+df = pd.read_csv('data/bodyPerformance.csv')
 # In[22]:
--- a/train.py
+++ b/train.py
@ -0,0 +1,94 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[ ]:
 import numpy as np
 import pandas as pd
 import torch
 from torch import nn, optim
 import torch.nn.functional as F
 import sys
 # In[ ]:
 X_train = pd.read_csv('X_train.csv')
 y_train = pd.read_csv('y_train.csv')
 # In[ ]:
 X_train = torch.from_numpy(np.array(X_train)).float()
 y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
 # In[ ]:
 class Net(nn.Module):
    def __init__(self, n_features):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(n_features, 5)
        self.fc2 = nn.Linear(5, 3)
        self.fc3 = nn.Linear(3, 1)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))
 # In[ ]:
 net = Net(X_train.shape[1])
 criterion = nn.BCELoss()
 optimizer = optim.Adam(net.parameters(), lr=0.001)
 # In[ ]:
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 X_train = X_train.to(device)
 y_train = y_train.to(device)
 net = net.to(device)
 criterion = criterion.to(device)
 # In[ ]:
 def calculate_accuracy(y_true, y_pred):
    predicted = y_pred.ge(.5).view(-1)
    return (y_true == predicted).sum().float() / len(y_true)
 def round_tensor(t, decimal_places=3):
    return round(t.item(), decimal_places)
 for epoch in range(1000):
    y_pred = net(X_train)
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train)
    if epoch % 100 == 0:
        train_acc = calculate_accuracy(y_train, y_pred)
        print(
            f'''epoch {epoch}
            Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
            ''')
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
 # In[ ]:
 torch.save(net, 'model.pth')