Update files

2022-05-29 13:46:40 +02:00 · 2022-05-29 13:46:40 +02:00 · e8af09d8a7
commit e8af09d8a7
parent 4905e68ac6
8 changed files with 145 additions and 195 deletions
--- a/.dvc/config
+++ b/.dvc/config
@ -0,0 +1,6 @@
+[core]
+    remote = ium_ssh_remote
+['remote "my_local_remote"']
+    url = /dvcstore
+['remote "ium_ssh_remote"']
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,8 @@ ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/

+/X_train.csv
+/X_test.csv
+/y_train.csv
+/y_test.csv
+/model.pth
--- a/3
+++ b/3
@ -13,5 +13,6 @@ WORKDIR /app


 COPY ./body-performance-data.zip ./
-COPY ./classification_net.py ./
+COPY ./prepare_datasets.py ./
+COPY ./train.py ./

--- a/classification_net.py
+++ b/classification_net.py
@ -1,192 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# In[ ]:
-
-
-# get_ipython().system('kaggle datasets download -d kukuroo3/body-performance-data')
-
-
-# In[ ]:
-
-
-get_ipython().system('unzip -o body-performance-data.zip')
-
-
-# In[114]:
-
-
-import numpy as np
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import classification_report
-import torch
-from torch import nn, optim
-import torch.nn.functional as F
-
-
-# In[115]:
-
-
-df = pd.read_csv('bodyPerformance.csv')
-df.shape
-
-
-# In[116]:
-
-
-df.head()
-
-
-# In[117]:
-
-
-cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
-df = df[cols]
-
-# male - 0, female - 1
-df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
-df = df.dropna(how='any')
-
-
-# In[118]:
-
-
-df.gender.value_counts() / df.shape[0]
-
-
-# In[119]:
-
-
-X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
-y = df[['gender']]
-
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
-
-# In[120]:
-
-
-X_train = torch.from_numpy(np.array(X_train)).float()
-y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
-
-X_test = torch.from_numpy(np.array(X_test)).float()
-y_test = torch.squeeze(torch.from_numpy(y_test.values).float())
-
-print(X_train.shape, y_train.shape)
-print(X_test.shape, y_test.shape)
-
-
-# In[121]:
-
-
-class Net(nn.Module):
-  def __init__(self, n_features):
-    super(Net, self).__init__()
-    self.fc1 = nn.Linear(n_features, 5)
-    self.fc2 = nn.Linear(5, 3)
-    self.fc3 = nn.Linear(3, 1)
-  def forward(self, x):
-    x = F.relu(self.fc1(x))
-    x = F.relu(self.fc2(x))
-    return torch.sigmoid(self.fc3(x))
-net = Net(X_train.shape[1])
-
-
-# In[122]:
-
-
-criterion = nn.BCELoss()
-
-
-# In[123]:
-
-
-optimizer = optim.Adam(net.parameters(), lr=0.001)
-
-
-# In[124]:
-
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
-
-# In[125]:
-
-
-X_train = X_train.to(device)
-y_train = y_train.to(device)
-X_test = X_test.to(device)
-y_test = y_test.to(device)
-
-
-# In[126]:
-
-
-net = net.to(device)
-criterion = criterion.to(device)
-
-
-# In[127]:
-
-
-def calculate_accuracy(y_true, y_pred):
-  predicted = y_pred.ge(.5).view(-1)
-  return (y_true == predicted).sum().float() / len(y_true)
-
-
-# In[128]:
-
-
-def round_tensor(t, decimal_places=3):
-  return round(t.item(), decimal_places)
-for epoch in range(1000):
-    y_pred = net(X_train)
-    y_pred = torch.squeeze(y_pred)
-    train_loss = criterion(y_pred, y_train)
-    if epoch % 100 == 0:
-      train_acc = calculate_accuracy(y_train, y_pred)
-      y_test_pred = net(X_test)
-      y_test_pred = torch.squeeze(y_test_pred)
-      test_loss = criterion(y_test_pred, y_test)
-      test_acc = calculate_accuracy(y_test, y_test_pred)
-      print(
-f'''epoch {epoch}
-Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
-Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
-''')
-    optimizer.zero_grad()
-    train_loss.backward()
-    optimizer.step()
-
-
-# In[129]:
-
-
-# torch.save(net, 'model.pth')
-
-
-# In[130]:
-
-
-# net = torch.load('model.pth')
-
-
-# In[131]:
-
-
-classes = ['Male', 'Female']
-y_pred = net(X_test)
-y_pred = y_pred.ge(.5).view(-1).cpu()
-y_test = y_test.cpu()
-print(classification_report(y_test, y_pred, target_names=classes))
-
-
-# In[132]:
-
-
-with open('test_out.csv', 'w') as file:
-    for y in y_pred:
-        file.write(classes[y.item()])
-        file.write('\n')
-
--- a/dvc.Jenkinsfile
+++ b/dvc.Jenkinsfile
@ -0,0 +1,17 @@
+pipeline {
+    agent {
+    	dockerfile true
+    }
+    stages {
+		stage('Dvc pull and reproduce') {
+			steps {
+				checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
+                withCredentials([string(credentialsId: 'ium-sftp-password', variable: 'IUM_SFTP_PASS')]) {
+                sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
+                sh 'dvc remote modify --local ium_ssh_remote password $IUM_SFTP_KEY'
+                sh 'dvc pull'
+    }
+			}
+		}
+    }
+}
--- a/dvc.yaml
+++ b/dvc.yaml
@ -0,0 +1,19 @@
+stages:
+  prepare_datasets:
+    cmd: python prepare_datasets.py X_train.csv X_test.csv y_train.csv y_test.csv
+    deps:
+    - data/bodyPerformance.csv
+    - prepare_datasets.py
+    outs:
+    - X_test.csv
+    - X_train.csv
+    - y_test.csv
+    - y_train.csv
+  train:
+    cmd: python train.py model.pth
+    deps:
+    - X_train.csv
+    - train.py
+    - y_train.csv
+    outs:
+    - model.pth
--- a/prepare_datasets.py
+++ b/prepare_datasets.py
@ -4,7 +4,7 @@
 # In[ ]:


-get_ipython().system('unzip -o body-performance-data.zip')
+# get_ipython().system('unzip -o body-performance-data.zip')


 # In[4]:
@ -17,7 +17,7 @@ from sklearn.model_selection import train_test_split
 # In[21]:


-df = pd.read_csv('bodyPerformance.csv')
+df = pd.read_csv('data/bodyPerformance.csv')


 # In[22]:
--- a/train.py
+++ b/train.py
@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import numpy as np
+import pandas as pd
+import torch
+from torch import nn, optim
+import torch.nn.functional as F
+import sys
+
+
+# In[ ]:
+
+
+X_train = pd.read_csv('X_train.csv')
+y_train = pd.read_csv('y_train.csv')
+
+
+# In[ ]:
+
+
+X_train = torch.from_numpy(np.array(X_train)).float()
+y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
+
+
+# In[ ]:
+
+
+class Net(nn.Module):
+    def __init__(self, n_features):
+        super(Net, self).__init__()
+        self.fc1 = nn.Linear(n_features, 5)
+        self.fc2 = nn.Linear(5, 3)
+        self.fc3 = nn.Linear(3, 1)
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        return torch.sigmoid(self.fc3(x))
+
+
+# In[ ]:
+
+
+net = Net(X_train.shape[1])
+criterion = nn.BCELoss()
+optimizer = optim.Adam(net.parameters(), lr=0.001)
+
+
+# In[ ]:
+
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+X_train = X_train.to(device)
+y_train = y_train.to(device)
+
+net = net.to(device)
+criterion = criterion.to(device)
+
+
+# In[ ]:
+
+
+def calculate_accuracy(y_true, y_pred):
+    predicted = y_pred.ge(.5).view(-1)
+    return (y_true == predicted).sum().float() / len(y_true)
+
+def round_tensor(t, decimal_places=3):
+    return round(t.item(), decimal_places)
+
+
+for epoch in range(1000):
+    y_pred = net(X_train)
+    y_pred = torch.squeeze(y_pred)
+    train_loss = criterion(y_pred, y_train)
+    if epoch % 100 == 0:
+        train_acc = calculate_accuracy(y_train, y_pred)
+        print(
+            f'''epoch {epoch}
+            Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
+            ''')
+    optimizer.zero_grad()
+    train_loss.backward()
+    optimizer.step()
+
+
+# In[ ]:
+
+
+torch.save(net, 'model.pth')
+