30 changed files with 3490 additions and 475 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
--- a/.dvc/config
+++ b/.dvc/config
@ -0,0 +1,6 @@
+[core]
+    remote = ium_ssh_remote
+['remote "my_local_remote"']
+    url = /dvcstore
+['remote "ium_ssh_remote"']
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,19 @@
+# ---> JupyterNotebooks
+# gitignore template for Jupyter Notebooks
+# website: http://jupyter.org/
+
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# IPython
+profile_default/
+ipython_config.py
+
+# Remove previous ipynb_checkpoints
+#   git rm -r .ipynb_checkpoints/
+
+/X_train.csv
+/X_test.csv
+/y_train.csv
+/y_test.csv
+/model.pth
--- a/14
+++ b/14
@ -1,4 +1,5 @@
 FROM ubuntu:latest
+
 RUN apt update && apt install -y python3-pip
 RUN pip3 install pandas
 RUN pip3 install sklearn
@ -6,11 +7,14 @@ RUN pip3 install seaborn
 RUN pip3 install ipython
 RUN pip3 install torch
 RUN pip3 install numpy
-RUN pip3 install mlflow
+RUN pip3 install dvc
+RUN pip3 install dvc[ssh] paramiko
+RUN apt-get install unzip

 WORKDIR /app

-COPY ./training.py ./
-COPY ./training_mlflow.py ./
-COPY ./evaluation.py ./
-COPY ./predict_444501.py ./
+
+COPY ./body-performance-data.zip ./
+COPY ./prepare_datasets.py ./
+COPY ./train.py ./
+
--- a/18
+++ b/18
@ -0,0 +1,18 @@
+pipeline {
+    agent {
+    	dockerfile true
+    }
+    stages {
+		stage('Check out from version control') {
+			steps {
+				checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
+			}
+		}
+        stage('Shell Script') {
+            steps {
+				sh 'ipython ./prepare_datasets.py'
+				archiveArtifacts artifacts: 'X_train.csv, X_test.csv, y_train.csv, y_test.csv '
+            }
+        }
+    }
+}
--- a/18
+++ b/18
@ -0,0 +1,18 @@
+pipeline {
+    agent {
+    	docker {image 'agakul/ium:4.0'}
+    }
+    stages {
+		stage('Check out from version control') {
+			steps {
+				checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
+			}
+		}
+        stage('Shell Script') {
+            steps {
+				sh 'ipython ./prepare_datasets.py'
+				archiveArtifacts artifacts: 'X_train.csv, X_test.csv, y_train.csv, y_test.csv '
+            }
+        }
+    }
+}
--- a/12
+++ b/12
@ -1,12 +0,0 @@
-name: s444421
-    
-docker_env:
-  image: agakul/ium:mlflow
-
-entry_points:
-  main:
-    parameters:
-      epochs: {type: float, default: 1000}
-    command: "python training_mlflow.py {epochs}"
-  test:
-    command: "python evaluation.py"
--- a/README.md
+++ b/README.md
@ -0,0 +1,2 @@
+# ium_444421
+
--- a/body-performance-data.zip
+++ b/body-performance-data.zip
--- a/classification_net.ipynb
+++ b/classification_net.ipynb
@ -0,0 +1,531 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "forty-fault",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kaggle datasets download -d kukuroo3/body-performance-data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "pediatric-tuesday",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!unzip -o body-performance-data.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "id": "interstate-presence",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import classification_report\n",
+    "import torch\n",
+    "from torch import nn, optim\n",
+    "import torch.nn.functional as F"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "id": "structural-trigger",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(13393, 12)"
+      ]
+     },
+     "execution_count": 115,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv('bodyPerformance.csv')\n",
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "id": "turkish-category",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>height_cm</th>\n",
+       "      <th>weight_kg</th>\n",
+       "      <th>body fat_%</th>\n",
+       "      <th>diastolic</th>\n",
+       "      <th>systolic</th>\n",
+       "      <th>gripForce</th>\n",
+       "      <th>sit and bend forward_cm</th>\n",
+       "      <th>sit-ups counts</th>\n",
+       "      <th>broad jump_cm</th>\n",
+       "      <th>class</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>27.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>172.3</td>\n",
+       "      <td>75.24</td>\n",
+       "      <td>21.3</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>54.9</td>\n",
+       "      <td>18.4</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>217.0</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>25.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>55.80</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>77.0</td>\n",
+       "      <td>126.0</td>\n",
+       "      <td>36.4</td>\n",
+       "      <td>16.3</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>229.0</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>31.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>179.6</td>\n",
+       "      <td>78.00</td>\n",
+       "      <td>20.1</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>152.0</td>\n",
+       "      <td>44.8</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>49.0</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>32.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>174.5</td>\n",
+       "      <td>71.10</td>\n",
+       "      <td>18.4</td>\n",
+       "      <td>76.0</td>\n",
+       "      <td>147.0</td>\n",
+       "      <td>41.4</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>28.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>173.8</td>\n",
+       "      <td>67.70</td>\n",
+       "      <td>17.1</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>127.0</td>\n",
+       "      <td>43.5</td>\n",
+       "      <td>27.1</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>217.0</td>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    age gender  height_cm  weight_kg  body fat_%  diastolic  systolic  \\\n",
+       "0  27.0      M      172.3      75.24        21.3       80.0     130.0   \n",
+       "1  25.0      M      165.0      55.80        15.7       77.0     126.0   \n",
+       "2  31.0      M      179.6      78.00        20.1       92.0     152.0   \n",
+       "3  32.0      M      174.5      71.10        18.4       76.0     147.0   \n",
+       "4  28.0      M      173.8      67.70        17.1       70.0     127.0   \n",
+       "\n",
+       "   gripForce  sit and bend forward_cm  sit-ups counts  broad jump_cm class  \n",
+       "0       54.9                     18.4            60.0          217.0     C  \n",
+       "1       36.4                     16.3            53.0          229.0     A  \n",
+       "2       44.8                     12.0            49.0          181.0     C  \n",
+       "3       41.4                     15.2            53.0          219.0     B  \n",
+       "4       43.5                     27.1            45.0          217.0     B  "
+      ]
+     },
+     "execution_count": 116,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 117,
+   "id": "received-absence",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']\n",
+    "df = df[cols]\n",
+    "\n",
+    "# male - 0, female - 1\n",
+    "df['gender'].replace({'M': 0, 'F': 1}, inplace = True)\n",
+    "df = df.dropna(how='any')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
+   "id": "excited-parent",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    0.632196\n",
+       "1    0.367804\n",
+       "Name: gender, dtype: float64"
+      ]
+     },
+     "execution_count": 118,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.gender.value_counts() / df.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "id": "extended-cinema",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]\n",
+    "y = df[['gender']]\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "id": "animated-farming",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([10714, 5]) torch.Size([10714])\n",
+      "torch.Size([2679, 5]) torch.Size([2679])\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train = torch.from_numpy(np.array(X_train)).float()\n",
+    "y_train = torch.squeeze(torch.from_numpy(y_train.values).float())\n",
+    "\n",
+    "X_test = torch.from_numpy(np.array(X_test)).float()\n",
+    "y_test = torch.squeeze(torch.from_numpy(y_test.values).float())\n",
+    "\n",
+    "print(X_train.shape, y_train.shape)\n",
+    "print(X_test.shape, y_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "id": "technical-wallet",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Net(nn.Module):\n",
+    "  def __init__(self, n_features):\n",
+    "    super(Net, self).__init__()\n",
+    "    self.fc1 = nn.Linear(n_features, 5)\n",
+    "    self.fc2 = nn.Linear(5, 3)\n",
+    "    self.fc3 = nn.Linear(3, 1)\n",
+    "  def forward(self, x):\n",
+    "    x = F.relu(self.fc1(x))\n",
+    "    x = F.relu(self.fc2(x))\n",
+    "    return torch.sigmoid(self.fc3(x))\n",
+    "net = Net(X_train.shape[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "id": "requested-plymouth",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "criterion = nn.BCELoss()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "id": "iraqi-english",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer = optim.Adam(net.parameters(), lr=0.001)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "id": "emerging-helmet",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "id": "differential-aviation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train = X_train.to(device)\n",
+    "y_train = y_train.to(device)\n",
+    "X_test = X_test.to(device)\n",
+    "y_test = y_test.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "id": "ranging-calgary",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "net = net.to(device)\n",
+    "criterion = criterion.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "id": "iraqi-blanket",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calculate_accuracy(y_true, y_pred):\n",
+    "  predicted = y_pred.ge(.5).view(-1)\n",
+    "  return (y_true == predicted).sum().float() / len(y_true)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "id": "robust-serbia",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "epoch 0\n",
+      "Train set - loss: 1.005, accuracy: 0.37\n",
+      "Test  set - loss: 1.018, accuracy: 0.358\n",
+      "\n",
+      "epoch 100\n",
+      "Train set - loss: 0.677, accuracy: 0.743\n",
+      "Test  set - loss: 0.679, accuracy: 0.727\n",
+      "\n",
+      "epoch 200\n",
+      "Train set - loss: 0.636, accuracy: 0.79\n",
+      "Test  set - loss: 0.64, accuracy: 0.778\n",
+      "\n",
+      "epoch 300\n",
+      "Train set - loss: 0.568, accuracy: 0.839\n",
+      "Test  set - loss: 0.577, accuracy: 0.833\n",
+      "\n",
+      "epoch 400\n",
+      "Train set - loss: 0.504, accuracy: 0.885\n",
+      "Test  set - loss: 0.514, accuracy: 0.877\n",
+      "\n",
+      "epoch 500\n",
+      "Train set - loss: 0.441, accuracy: 0.922\n",
+      "Test  set - loss: 0.45, accuracy: 0.913\n",
+      "\n",
+      "epoch 600\n",
+      "Train set - loss: 0.388, accuracy: 0.944\n",
+      "Test  set - loss: 0.396, accuracy: 0.938\n",
+      "\n",
+      "epoch 700\n",
+      "Train set - loss: 0.353, accuracy: 0.954\n",
+      "Test  set - loss: 0.359, accuracy: 0.949\n",
+      "\n",
+      "epoch 800\n",
+      "Train set - loss: 0.327, accuracy: 0.958\n",
+      "Test  set - loss: 0.333, accuracy: 0.953\n",
+      "\n",
+      "epoch 900\n",
+      "Train set - loss: 0.306, accuracy: 0.961\n",
+      "Test  set - loss: 0.312, accuracy: 0.955\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "def round_tensor(t, decimal_places=3):\n",
+    "  return round(t.item(), decimal_places)\n",
+    "for epoch in range(1000):\n",
+    "    y_pred = net(X_train)\n",
+    "    y_pred = torch.squeeze(y_pred)\n",
+    "    train_loss = criterion(y_pred, y_train)\n",
+    "    if epoch % 100 == 0:\n",
+    "      train_acc = calculate_accuracy(y_train, y_pred)\n",
+    "      y_test_pred = net(X_test)\n",
+    "      y_test_pred = torch.squeeze(y_test_pred)\n",
+    "      test_loss = criterion(y_test_pred, y_test)\n",
+    "      test_acc = calculate_accuracy(y_test, y_test_pred)\n",
+    "      print(\n",
+    "f'''epoch {epoch}\n",
+    "Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}\n",
+    "Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}\n",
+    "''')\n",
+    "    optimizer.zero_grad()\n",
+    "    train_loss.backward()\n",
+    "    optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 129,
+   "id": "optimum-excerpt",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# torch.save(net, 'model.pth')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "id": "dental-seating",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# net = torch.load('model.pth')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "id": "german-satisfaction",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "        Male       0.97      0.96      0.96      1720\n",
+      "      Female       0.93      0.94      0.94       959\n",
+      "\n",
+      "    accuracy                           0.95      2679\n",
+      "   macro avg       0.95      0.95      0.95      2679\n",
+      "weighted avg       0.95      0.95      0.95      2679\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "classes = ['Male', 'Female']\n",
+    "y_pred = net(X_test)\n",
+    "y_pred = y_pred.ge(.5).view(-1).cpu()\n",
+    "y_test = y_test.cpu()\n",
+    "print(classification_report(y_test, y_pred, target_names=classes))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "id": "british-incidence",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('test_out.csv', 'w') as file:\n",
+    "    for y in y_pred:\n",
+    "        file.write(classes[y.item()])\n",
+    "        file.write('\\n')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/data/.gitignore
+++ b/data/.gitignore
@ -0,0 +1 @@
+/bodyPerformance.csv
--- a/data/bodyPerformance.csv.dvc
+++ b/data/bodyPerformance.csv.dvc
@ -0,0 +1,4 @@
+outs:
+- md5: 6d7c3e3d110fac2ade9d8bce60238208
+  size: 761835
+  path: bodyPerformance.csv
--- a/download_data.sh
+++ b/download_data.sh
@ -0,0 +1,3 @@
+#kaggle datasets download -d tejashvi14/travel-insurance-prediction-data
+unzip -o travel-insurance-prediction-data.zip
+head -n $CUTOFF TravelInsurancePrediction.csv > travel_insurance_data.txt
--- a/dvc.Jenkinsfile
+++ b/dvc.Jenkinsfile
@ -0,0 +1,17 @@
+pipeline {
+    agent {
+    	dockerfile true
+    }
+    stages {
+		stage('Dvc pull and reproduce') {
+			steps {
+				checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
+withCredentials(
+    [sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
+                sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
+                sh 'dvc pull'
+                sh 'dvc repro'}
+			}
+		}
+    }
+}
--- a/dvc.yaml
+++ b/dvc.yaml
@ -0,0 +1,10 @@
+stages:
+  prepare_datasets:
+    cmd: python3 prepare_datasets.py 
+    deps:
+    - data/bodyPerformance.csv
+    - prepare_datasets.py
+  train:
+    cmd: python3 train.py
+    deps:
+    - train.py
--- a/environment.yml
+++ b/environment.yml
@ -0,0 +1,124 @@
+name: s444421
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - alembic=1.7.7=pyhd8ed1ab_0
+  - appdirs=1.4.4=pyh9f0ad1d_0
+  - asn1crypto=1.5.1=pyhd8ed1ab_0
+  - blas=1.0=mkl
+  - bottleneck=1.3.4=py39hce1f21e_0
+  - brotlipy=0.7.0=py39hb9d737c_1004
+  - ca-certificates=2022.5.18.1=ha878542_0
+  - certifi=2022.5.18.1=py39hf3d152e_0
+  - cffi=1.15.0=py39hd667e15_1
+  - charset-normalizer=2.0.12=pyhd8ed1ab_0
+  - click=8.1.3=py39hf3d152e_0
+  - cloudpickle=2.1.0=pyhd8ed1ab_0
+  - configparser=5.2.0=pyhd8ed1ab_0
+  - cryptography=37.0.2=py39hd97740a_0
+  - cycler=0.11.0=pyhd8ed1ab_0
+  - databricks-cli=0.12.1=pyhd8ed1ab_0
+  - docker-py=5.0.3=py39hf3d152e_2
+  - docker-pycreds=0.4.0=py_0
+  - entrypoints=0.4=pyhd8ed1ab_0
+  - flask=2.1.2=pyhd8ed1ab_1
+  - freetype=2.10.4=h0708190_1
+  - future=0.18.2=py39hf3d152e_5
+  - gitdb=4.0.9=pyhd8ed1ab_0
+  - gitpython=3.1.27=pyhd8ed1ab_0
+  - greenlet=1.1.2=py39h5a03fae_2
+  - gunicorn=20.1.0=py39hf3d152e_2
+  - idna=3.3=pyhd8ed1ab_0
+  - importlib-metadata=4.11.3=py39hf3d152e_1
+  - importlib_resources=5.7.1=pyhd8ed1ab_1
+  - intel-openmp=2021.4.0=h06a4308_3561
+  - itsdangerous=2.1.2=pyhd8ed1ab_0
+  - jinja2=3.1.2=pyhd8ed1ab_0
+  - joblib=1.1.0=pyhd8ed1ab_0
+  - jpeg=9e=h166bdaf_1
+  - kiwisolver=1.4.2=py39hf939315_1
+  - lcms2=2.12=hddcbb42_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libblas=3.9.0=12_linux64_mkl
+  - libcblas=3.9.0=12_linux64_mkl
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=11.2.0=h1234567_0
+  - libgfortran-ng=12.1.0=h69a702a_16
+  - libgfortran5=12.1.0=hdcd56e2_16
+  - libgomp=11.2.0=h1234567_0
+  - liblapack=3.9.0=12_linux64_mkl
+  - libpng=1.6.37=h21135ba_2
+  - libprotobuf=3.19.1=h4ff587b_0
+  - libstdcxx-ng=11.2.0=h1234567_0
+  - libtiff=4.2.0=h85742a9_0
+  - libwebp-base=1.2.2=h7f98852_1
+  - lz4-c=1.9.3=h9c3ff4c_1
+  - mako=1.2.0=pyhd8ed1ab_1
+  - markupsafe=2.1.1=py39hb9d737c_1
+  - matplotlib-base=3.4.3=py39h2fa2bec_2
+  - mkl=2021.4.0=h06a4308_640
+  - mkl-service=2.4.0=py39h7f8727e_0
+  - mkl_fft=1.3.1=py39hd3c417c_0
+  - mkl_random=1.2.2=py39h51133e4_0
+  - mlflow=1.26.0=py39ha39b057_0
+  - ncurses=6.3=h7f8727e_2
+  - ninja=1.11.0=h924138e_0
+  - numexpr=2.8.1=py39h6abb31d_0
+  - numpy=1.22.3=py39he7a7128_0
+  - numpy-base=1.22.3=py39hf524024_0
+  - olefile=0.46=pyh9f0ad1d_1
+  - openssl=1.1.1o=h166bdaf_0
+  - packaging=21.3=pyhd3eb1b0_0
+  - pandas=1.4.2=py39h295c915_0
+  - patsy=0.5.2=pyhd8ed1ab_0
+  - pillow=7.2.0=py39h6f3857e_2
+  - pip=21.2.4=py39h06a4308_0
+  - prometheus_client=0.14.1=pyhd8ed1ab_0
+  - prometheus_flask_exporter=0.20.1=pyhd8ed1ab_0
+  - protobuf=3.19.1=py39h295c915_0
+  - pycparser=2.21=pyhd8ed1ab_0
+  - pyopenssl=22.0.0=pyhd8ed1ab_0
+  - pyparsing=3.0.4=pyhd3eb1b0_0
+  - pysocks=1.7.1=py39hf3d152e_5
+  - python=3.9.12=h12debd9_0
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - python_abi=3.9=2_cp39
+  - pytorch=1.10.0=cpu_py39hc70245e_1
+  - pytz=2021.3=pyhd3eb1b0_0
+  - pyyaml=6.0=py39hb9d737c_4
+  - querystring_parser=1.2.4=py_0
+  - readline=8.1.2=h7f8727e_1
+  - requests=2.27.1=pyhd8ed1ab_0
+  - scikit-learn=1.1.1=py39h4037b75_0
+  - scipy=1.8.0=py39hee8e79c_1
+  - seaborn=0.11.2=hd8ed1ab_0
+  - seaborn-base=0.11.2=pyhd8ed1ab_0
+  - setuptools=61.2.0=py39h06a4308_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sleef=3.5.1=h9b69904_2
+  - smmap=3.0.5=pyh44b312d_0
+  - sqlalchemy=1.4.36=py39hb9d737c_0
+  - sqlite=3.38.3=hc218d9a_0
+  - sqlparse=0.4.2=pyhd8ed1ab_0
+  - statsmodels=0.13.2=py39hce5d2b2_0
+  - tabulate=0.8.9=pyhd8ed1ab_0
+  - tenacity=8.0.1=pyhd8ed1ab_0
+  - threadpoolctl=3.1.0=pyh8a188c0_0
+  - tk=8.6.11=h1ccaba5_1
+  - tornado=6.1=py39hb9d737c_3
+  - typing_extensions=4.2.0=pyha770c72_1
+  - tzdata=2022a=hda174b7_0
+  - urllib3=1.26.9=pyhd8ed1ab_0
+  - websocket-client=1.3.2=pyhd8ed1ab_0
+  - werkzeug=2.1.2=pyhd8ed1ab_1
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.2.5=h7f8727e_1
+  - yaml=0.2.5=h7f98852_2
+  - zipp=3.8.0=pyhd8ed1ab_0
+  - zlib=1.2.12=h7f8727e_2
+  - zstd=1.4.9=ha95c52a_0
+prefix: /home/agata/anaconda3/envs/s444421
+
--- a/evaluation.Jenkinsfile
+++ b/evaluation.Jenkinsfile
@ -1,41 +0,0 @@
-def ACC = ''
-
-pipeline {
-    agent {
-    	dockerfile true
-    }
-    	parameters {
-    			gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training_and_evaluation', name: 'BRANCH', type: 'PT_BRANCH'
-    			buildSelector(
-			defaultSelector: lastSuccessful(),
-			description: 'Which build to use for copying artifacts',
-			name: 'BUILD_SELECTOR'
-		)
-	}
-    stages {
-        stage('Stage 1') {
-            steps {
-            	git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git'
-            	copyArtifacts filter: '*', projectName:'s444421-create-dataset', selector: buildParameter('BUILD_SELECTOR')
-            	copyArtifacts filter: '*', projectName:'s444421-training/${BRANCH}/', selector: buildParameter('BUILD_SELECTOR')
-            	copyArtifacts filter: '*', projectName:'s444421-evaluation/training_and_evaluation', optional: true
-            	sh 'ipython ./evaluation.py'
-            	archiveArtifacts artifacts: 'build_accuracy.txt, bilds_accuracy.jpg'
-            }
-        }
-    }
-    post {
-    	success {
-    		emailext body: 'SUCCESS', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
-      		}
-      	failure {
-      		emailext body: 'FAILURE', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
-      	}
-      	aborted {
-      		emailext body: 'ABORTED', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
-      	}
-      	changed {
-      		emailext body: 'CHANGED', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
-      	}
-    }
-}
--- a/evaluation.py
+++ b/evaluation.py
@ -1,89 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# In[ ]:
-
-
-import numpy as np
-import pandas as pd
-from sklearn.metrics import accuracy_score
-import torch
-from torch import nn, optim
-import torch.nn.functional as F
-import matplotlib.pyplot as plt
-
-
-# In[ ]:
-
-
-class Net(nn.Module):
-    def __init__(self, n_features):
-        super(Net, self).__init__()
-        self.fc1 = nn.Linear(n_features, 5)
-        self.fc2 = nn.Linear(5, 3)
-        self.fc3 = nn.Linear(3, 1)
-    def forward(self, x):
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        return torch.sigmoid(self.fc3(x))
-
-
-# In[ ]:
-
-
-X_test = pd.read_csv('X_test.csv')
-y_test = pd.read_csv('y_test.csv')
-
-
-# In[ ]:
-
-
-X_test = torch.from_numpy(np.array(X_test)).float()
-y_test = torch.squeeze(torch.from_numpy(y_test.values).float())
-
-
-# In[ ]:
-
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-X_test = X_test.to(device)
-y_test = y_test.to(device)
-
-
-# In[ ]:
-
-
-net = torch.load('model.pth')
-
-
-# In[ ]:
-
-
-y_pred = net(X_test)
-y_pred = y_pred.ge(.5).view(-1).cpu()
-y_test = y_test.cpu()
-
-
-# In[ ]:
-
-
-accuracy = accuracy_score(y_test, y_pred)
-with open('build_accuracy.txt', 'a') as file:
-    file.write(str(accuracy))
-    file.write('\n')
-
-
-# In[ ]:
-
-
-with open('build_accuracy.txt') as file:
-    acc = [float(line.rstrip()) for line in file]
-    
-builds = list(range(1, len(acc) + 1))
-
-plt.xlabel('build')
-plt.ylabel('accuracy')
-plt.plot(builds, acc, 'ro')
-plt.show()
-plt.savefig('bilds_accuracy.jpg')
-
--- a/get_stats.sh
+++ b/get_stats.sh
@ -0,0 +1 @@
+wc -l travel_insurance_data.txt > stats.txt
--- a/predict_444501.Jenkinsfile
+++ b/predict_444501.Jenkinsfile
@ -1,31 +0,0 @@
-pipeline {
-    agent {
-    	dockerfile {
-            filename 'Dockerfile'}
-        }
-        parameters {
-    		buildSelector(
-			defaultSelector: lastSuccessful(),
-			description: 'Which build to use for copying artifacts',
-			name: 'BUILD_SELECTOR'
-            )
-            string(
-			defaultValue: '{\\"inputs\\": [[167.39999389648438, 72.18000030517578, 40.0, 21.0, 94.0], [162.3000030517578, 67.30000305175781, 18.0, 52.0, 219.0], [178.5, 90.5, 14.699999809265137, 45.0, 262.0], [180.89999389648438, 77.0999984741211, 25.399999618530273, 43.0, 224.0], [177.3000030517578, 88.4800033569336, 35.599998474121094, 18.0, 183.0]]}',
-            description: 'Inputs',
-            name: 'INPUT'
-		)
-        }
-    stages {
-		stage('Copy artifacts') {
-			steps {
-                copyArtifacts fingerprintArtifacts: true, projectName: 's444421-training/training_and_evaluation', selector: buildParameter('BUILD_SELECTOR')
-			}
-		}
-        stage('Predict') {
-            steps {
-                sh "echo ${params.INPUT} > input_example.json"
-				sh "ipython ./predict_444501.py"
-            }
-        }
-    }
-}
--- a/predict_444501.py
+++ b/predict_444501.py
@ -1,9 +0,0 @@
-import mlflow
-import numpy as np
-
-model = mlflow.pyfunc.load_model('mlruns/1/e435ee5c0c5a468c99eb43c13df4a94b/artifacts/s444421')
-
-with open('input_example.json') as f:
-    input = json.load(f)
-    y_predicted = model.predict(np.array([data['inputs']]).reshape(-1, 2))
-    print(y_predicted[:5])
--- a/preparation.ipynb
+++ b/preparation.ipynb
--- a/preparation.py
+++ b/preparation.py
@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+# get_ipython().system('kaggle datasets download -d tejashvi14/travel-insurance-prediction-data')
+
+
+# In[ ]:
+
+
+get_ipython().system('unzip -o travel-insurance-prediction-data.zip')
+
+
+# In[5]:
+
+
+import pandas as pd
+travel_insurance=pd.read_csv('TravelInsurancePrediction.csv', index_col=0)
+travel_insurance
+
+
+# In[ ]:
+
+
+# usunięcie wierszy zawierających braki
+travel_insurance.dropna(axis='index', how='any')
+
+
+# In[6]:
+
+
+# normalizacja danych
+for column in travel_insurance.columns:
+    if travel_insurance[column].dtype == 'object':
+        travel_insurance[column] = travel_insurance[column].str.lower()
+
+travel_insurance
+
+
+# In[8]:
+
+
+# podział na podzbiory train/dev/test
+import sklearn
+from sklearn.model_selection import train_test_split
+travel_insurance_train, travel_insurance_rest = sklearn.model_selection.train_test_split(travel_insurance, test_size=0.4, random_state=1)
+travel_insurance_test, travel_insurance_dev = sklearn.model_selection.train_test_split(travel_insurance_rest, test_size=0.5, random_state=1)
+
+
+# In[27]:
+
+
+travel_insurance.describe(include='all')
+
+
+# In[23]:
+
+
+# zwracanie informacji o danym zbiorze 
+
+import seaborn as sns
+
+def printInformation(data):
+    print(f'Size (rows): {len(data)}\n')
+    mean_value = data.mean()
+    min_value = data.min(numeric_only=True)
+    max_value = data.max(numeric_only=True)
+    std_value = data.std()
+    median_value = data.median()
+    print(f'(mean)\n{mean_value}', f'(min)\n{min_value}', f'(max)\n{max_value}', f'(std)\n{std_value}', f'(median)\n{median_value}', sep="\n\n")
+    sns.pairplot(data=data, hue="TravelInsurance")
+
+
+# In[24]:
+
+
+printInformation(travel_insurance)
+
+
+# In[11]:
+
+
+printInformation(travel_insurance_train)
+
+
+# In[12]:
+
+
+printInformation(travel_insurance_test)
+
+
+# In[13]:
+
+
+printInformation(travel_insurance_dev)
+
+
+# In[ ]:
+
+
+
+
--- a/prepare_datasets.py
+++ b/prepare_datasets.py
@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+# get_ipython().system('unzip -o body-performance-data.zip')
+
+
+# In[4]:
+
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+
+# In[21]:
+
+
+df = pd.read_csv('data/bodyPerformance.csv')
+
+
+# In[22]:
+
+
+cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
+df = df[cols]
+
+# male - 0, female - 1
+df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
+df = df.dropna(how='any')
+
+
+# In[23]:
+
+
+X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
+y = df[['gender']]
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+
+# In[24]:
+
+
+X_train.to_csv(r'X_train.csv', index=False)
+X_test.to_csv(r'X_test.csv', index=False)
+y_train.to_csv(r'y_train.csv', index=False)
+y_test.to_csv(r'y_test.csv', index=False)
+
--- a/training.py
+++ b/training.py
@ -15,12 +15,6 @@ import sys
 # In[ ]:


-epochs = int(sys.argv[1])
-
-
-# In[ ]:
-
-
 X_train = pd.read_csv('X_train.csv')
 y_train = pd.read_csv('y_train.csv')

@ -78,7 +72,7 @@ def round_tensor(t, decimal_places=3):
    return round(t.item(), decimal_places)


-for epoch in range(epochs):
+for epoch in range(1000):
    y_pred = net(X_train)
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train)
--- a/training.Jenkinsfile
+++ b/training.Jenkinsfile
@ -1,37 +0,0 @@
-pipeline {
-    agent {
-	    dockerfile {
-	        filename 'Dockerfile'
-	        args '-v /mlruns:/mlruns'
-	    }
-    }
-        options {
-    		copyArtifactPermission('s444421-predict-s444501');
-    	}
-    	parameters {
-    			buildSelector(
-			defaultSelector: lastSuccessful(),
-			description: 'Which build to use for copying artifacts',
-			name: 'BUILD_SELECTOR'
-		)
-		string(
-			defaultValue: '1000',
-            		description: 'Number of epochs',
-            		name: 'EPOCHS'
-		)
-	}
-    stages {
-		stage('Check out from version control') {
-			steps {
-				checkout([$class: 'GitSCM', branches: [[name: '*/training_and_evaluation']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
-			}
-		}
-        stage('Training') {
-            steps {
-            	copyArtifacts filter: '*', projectName:'s444421-create-dataset', selector: buildParameter('BUILD_SELECTOR')
-            	sh 'ipython ./training_mlflow.py $EPOCHS'
-            	archiveArtifacts artifacts: 'mlruns/**'
-            }
-        }
-    }
-}
--- a/training_mlflow.py
+++ b/training_mlflow.py
@ -1,131 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# In[ ]:
-
-
-import numpy as np
-import pandas as pd
-from sklearn.metrics import accuracy_score
-import torch
-from torch import nn, optim
-import torch.nn.functional as F
-import sys
-import mlflow
-from urllib.parse import urlparse
-
-
-# In[ ]:
-
-mlflow.set_tracking_uri("http://172.17.0.1:5000")
-mlflow.set_experiment("s444421")
-
-
-# In[ ]:
-
-
-epochs = int(sys.argv[1])
-
-
-# In[ ]:
-
-
-def prepare_data():
-    X_train = pd.read_csv('X_train.csv')
-    y_train = pd.read_csv('y_train.csv')
-    X_train = torch.from_numpy(np.array(X_train)).float()
-    y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
-    return X_train, y_train
-
-
-# In[ ]:
-
-
-class Net(nn.Module):
-    def __init__(self, n_features):
-        super(Net, self).__init__()
-        self.fc1 = nn.Linear(n_features, 5)
-        self.fc2 = nn.Linear(5, 3)
-        self.fc3 = nn.Linear(3, 1)
-    def forward(self, x):
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        return torch.sigmoid(self.fc3(x))
-
-
-# In[ ]:
-
-
-def calculate_accuracy(y_true, y_pred):
-    predicted = y_pred.ge(.5).view(-1)
-    return (y_true == predicted).sum().float() / len(y_true)
-
-
-# In[ ]:
-
-
-def round_tensor(t, decimal_places=3):
-    return round(t.item(), decimal_places)
-
-# In[ ]:
-
-
-def train_model(X_train, y_train, device, epochs):
-    net = Net(X_train.shape[1])
-    criterion = nn.BCELoss()
-    optimizer = optim.Adam(net.parameters(), lr=0.001)
-    
-    X_train = X_train.to(device)
-    y_train = y_train.to(device)
-    
-    net = net.to(device)
-    criterion = criterion.to(device)
-
-    for epoch in range(epochs):
-        y_pred = net(X_train)
-        y_pred = torch.squeeze(y_pred)
-        train_loss = criterion(y_pred, y_train)
-        if epoch % 100 == 0:
-            train_acc = calculate_accuracy(y_train, y_pred)
-            print(
-                f'''epoch {epoch}
-                Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
-                ''')
-    optimizer.zero_grad()
-    train_loss.backward()
-    optimizer.step()
-    return net, round_tensor(train_loss)
-
-
-# In[ ]:
-
-def my_main(epochs):
-    X_train, y_train = prepare_data()
-    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-    
-    model, loss = train_model(X_train, y_train, device, epochs)
-    torch.save(model, 'model.pth')
-    mlflow.log_param("epochs", epochs)
-    mlflow.log_metric("loss", loss)
-    
-    X_test = pd.read_csv('X_test.csv')
-    X_test = torch.from_numpy(np.array(X_test)).float()
-    X_test = X_test.to(device)
-    y_pred = model(X_test)
-    y_pred = y_pred.ge(.5).view(-1).cpu()
-    
-    signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(y_pred))
-    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
-    
-    if tracking_url_type_store != "file":
-        mlflow.sklearn.log_model(model, "my_model", registered_model_name="s444421", signature=signature, input_example=X_test.numpy()[:5])
-    else:
-        mlflow.sklearn.log_model(model, "my_model", signature=signature, input_example=X_test.numpy()[:5])
-
-
-# In[ ]:
-
-with mlflow.start_run() as run:
-    print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
-    print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
-    my_main(epochs)
--- a/training_sacred.py
+++ b/training_sacred.py
@ -1,113 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# In[ ]:
-
-
-import numpy as np
-import pandas as pd
-from sklearn.metrics import accuracy_score
-import torch
-from torch import nn, optim
-import torch.nn.functional as F
-import sys
-from sacred import Experiment
-from sacred.observers import FileStorageObserver, MongoObserver
-
-
-# In[ ]:
-
-
-ex = Experiment(save_git_info=False)
-ex.observers.append(FileStorageObserver('my_runs'))
-ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
-
-@ex.config
-def my_config():
-    epochs = 400
-
-
-# In[ ]:
-
-
-def prepare_data():
-    X_train = pd.read_csv('X_train.csv')
-    y_train = pd.read_csv('y_train.csv')
-    X_train = torch.from_numpy(np.array(X_train)).float()
-    y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
-    return X_train, y_train
-
-
-# In[ ]:
-
-
-class Net(nn.Module):
-    def __init__(self, n_features):
-        super(Net, self).__init__()
-        self.fc1 = nn.Linear(n_features, 5)
-        self.fc2 = nn.Linear(5, 3)
-        self.fc3 = nn.Linear(3, 1)
-    def forward(self, x):
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        return torch.sigmoid(self.fc3(x))
-
-
-# In[ ]:
-
-
-def calculate_accuracy(y_true, y_pred):
-    predicted = y_pred.ge(.5).view(-1)
-    return (y_true == predicted).sum().float() / len(y_true)
-
-
-# In[ ]:
-
-
-def round_tensor(t, decimal_places=3):
-    return round(t.item(), decimal_places)
-
-# In[ ]:
-
-
-def train_model(X_train, y_train, device, epochs):
-    net = Net(X_train.shape[1])
-    criterion = nn.BCELoss()
-    optimizer = optim.Adam(net.parameters(), lr=0.001)
-    
-    X_train = X_train.to(device)
-    y_train = y_train.to(device)
-    
-    net = net.to(device)
-    criterion = criterion.to(device)
-
-    for epoch in range(epochs):
-        y_pred = net(X_train)
-        y_pred = torch.squeeze(y_pred)
-        train_loss = criterion(y_pred, y_train)
-        if epoch % 100 == 0:
-            train_acc = calculate_accuracy(y_train, y_pred)
-            print(
-                f'''epoch {epoch}
-                Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
-                ''')
-    optimizer.zero_grad()
-    train_loss.backward()
-    optimizer.step()
-    return net, round_tensor(train_loss)
-
-
-# In[ ]:
-
-
-@ex.automain
-def my_main(epochs, _run):
-    X_train, y_train = prepare_data()
-    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-    
-    model, loss = train_model(X_train, y_train, device, epochs)
-    torch.save(model, 'model.pth')
-    ex.add_artifact('model.pth')
-    
-    _run.info["epochs"] = epochs
-    _run.info["loss"] = loss
--- a/travel-insurance-prediction-data.zip
+++ b/travel-insurance-prediction-data.zip