Compare commits
No commits in common. "training_and_evaluation" and "master" have entirely different histories.
training_a
...
master
3
.dvc/.gitignore
vendored
Normal file
3
.dvc/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
/config.local
|
||||||
|
/tmp
|
||||||
|
/cache
|
6
.dvc/config
Normal file
6
.dvc/config
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
[core]
|
||||||
|
remote = ium_ssh_remote
|
||||||
|
['remote "my_local_remote"']
|
||||||
|
url = /dvcstore
|
||||||
|
['remote "ium_ssh_remote"']
|
||||||
|
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
3
.dvcignore
Normal file
3
.dvcignore
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Add patterns of files dvc should ignore, which could improve
|
||||||
|
# the performance. Learn more at
|
||||||
|
# https://dvc.org/doc/user-guide/dvcignore
|
19
.gitignore
vendored
Normal file
19
.gitignore
vendored
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# ---> JupyterNotebooks
|
||||||
|
# gitignore template for Jupyter Notebooks
|
||||||
|
# website: http://jupyter.org/
|
||||||
|
|
||||||
|
.ipynb_checkpoints
|
||||||
|
*/.ipynb_checkpoints/*
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# Remove previous ipynb_checkpoints
|
||||||
|
# git rm -r .ipynb_checkpoints/
|
||||||
|
|
||||||
|
/X_train.csv
|
||||||
|
/X_test.csv
|
||||||
|
/y_train.csv
|
||||||
|
/y_test.csv
|
||||||
|
/model.pth
|
14
Dockerfile
14
Dockerfile
@ -1,4 +1,5 @@
|
|||||||
FROM ubuntu:latest
|
FROM ubuntu:latest
|
||||||
|
|
||||||
RUN apt update && apt install -y python3-pip
|
RUN apt update && apt install -y python3-pip
|
||||||
RUN pip3 install pandas
|
RUN pip3 install pandas
|
||||||
RUN pip3 install sklearn
|
RUN pip3 install sklearn
|
||||||
@ -6,11 +7,14 @@ RUN pip3 install seaborn
|
|||||||
RUN pip3 install ipython
|
RUN pip3 install ipython
|
||||||
RUN pip3 install torch
|
RUN pip3 install torch
|
||||||
RUN pip3 install numpy
|
RUN pip3 install numpy
|
||||||
RUN pip3 install mlflow
|
RUN pip3 install dvc
|
||||||
|
RUN pip3 install dvc[ssh] paramiko
|
||||||
|
RUN apt-get install unzip
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./training.py ./
|
|
||||||
COPY ./training_mlflow.py ./
|
COPY ./body-performance-data.zip ./
|
||||||
COPY ./evaluation.py ./
|
COPY ./prepare_datasets.py ./
|
||||||
COPY ./predict_444501.py ./
|
COPY ./train.py ./
|
||||||
|
|
||||||
|
18
Jenkinsfile
vendored
Normal file
18
Jenkinsfile
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Check out from version control') {
|
||||||
|
steps {
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Shell Script') {
|
||||||
|
steps {
|
||||||
|
sh 'ipython ./prepare_datasets.py'
|
||||||
|
archiveArtifacts artifacts: 'X_train.csv, X_test.csv, y_train.csv, y_test.csv '
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
18
Jenkinsfile2
Normal file
18
Jenkinsfile2
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
docker {image 'agakul/ium:4.0'}
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Check out from version control') {
|
||||||
|
steps {
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Shell Script') {
|
||||||
|
steps {
|
||||||
|
sh 'ipython ./prepare_datasets.py'
|
||||||
|
archiveArtifacts artifacts: 'X_train.csv, X_test.csv, y_train.csv, y_test.csv '
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
12
MLproject
12
MLproject
@ -1,12 +0,0 @@
|
|||||||
name: s444421
|
|
||||||
|
|
||||||
docker_env:
|
|
||||||
image: agakul/ium:mlflow
|
|
||||||
|
|
||||||
entry_points:
|
|
||||||
main:
|
|
||||||
parameters:
|
|
||||||
epochs: {type: float, default: 1000}
|
|
||||||
command: "python training_mlflow.py {epochs}"
|
|
||||||
test:
|
|
||||||
command: "python evaluation.py"
|
|
BIN
body-performance-data.zip
Normal file
BIN
body-performance-data.zip
Normal file
Binary file not shown.
531
classification_net.ipynb
Normal file
531
classification_net.ipynb
Normal file
@ -0,0 +1,531 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "forty-fault",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!kaggle datasets download -d kukuroo3/body-performance-data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "pediatric-tuesday",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!unzip -o body-performance-data.zip"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 114,
|
||||||
|
"id": "interstate-presence",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from sklearn.metrics import classification_report\n",
|
||||||
|
"import torch\n",
|
||||||
|
"from torch import nn, optim\n",
|
||||||
|
"import torch.nn.functional as F"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 115,
|
||||||
|
"id": "structural-trigger",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(13393, 12)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 115,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df = pd.read_csv('bodyPerformance.csv')\n",
|
||||||
|
"df.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 116,
|
||||||
|
"id": "turkish-category",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>age</th>\n",
|
||||||
|
" <th>gender</th>\n",
|
||||||
|
" <th>height_cm</th>\n",
|
||||||
|
" <th>weight_kg</th>\n",
|
||||||
|
" <th>body fat_%</th>\n",
|
||||||
|
" <th>diastolic</th>\n",
|
||||||
|
" <th>systolic</th>\n",
|
||||||
|
" <th>gripForce</th>\n",
|
||||||
|
" <th>sit and bend forward_cm</th>\n",
|
||||||
|
" <th>sit-ups counts</th>\n",
|
||||||
|
" <th>broad jump_cm</th>\n",
|
||||||
|
" <th>class</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>27.0</td>\n",
|
||||||
|
" <td>M</td>\n",
|
||||||
|
" <td>172.3</td>\n",
|
||||||
|
" <td>75.24</td>\n",
|
||||||
|
" <td>21.3</td>\n",
|
||||||
|
" <td>80.0</td>\n",
|
||||||
|
" <td>130.0</td>\n",
|
||||||
|
" <td>54.9</td>\n",
|
||||||
|
" <td>18.4</td>\n",
|
||||||
|
" <td>60.0</td>\n",
|
||||||
|
" <td>217.0</td>\n",
|
||||||
|
" <td>C</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>25.0</td>\n",
|
||||||
|
" <td>M</td>\n",
|
||||||
|
" <td>165.0</td>\n",
|
||||||
|
" <td>55.80</td>\n",
|
||||||
|
" <td>15.7</td>\n",
|
||||||
|
" <td>77.0</td>\n",
|
||||||
|
" <td>126.0</td>\n",
|
||||||
|
" <td>36.4</td>\n",
|
||||||
|
" <td>16.3</td>\n",
|
||||||
|
" <td>53.0</td>\n",
|
||||||
|
" <td>229.0</td>\n",
|
||||||
|
" <td>A</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>31.0</td>\n",
|
||||||
|
" <td>M</td>\n",
|
||||||
|
" <td>179.6</td>\n",
|
||||||
|
" <td>78.00</td>\n",
|
||||||
|
" <td>20.1</td>\n",
|
||||||
|
" <td>92.0</td>\n",
|
||||||
|
" <td>152.0</td>\n",
|
||||||
|
" <td>44.8</td>\n",
|
||||||
|
" <td>12.0</td>\n",
|
||||||
|
" <td>49.0</td>\n",
|
||||||
|
" <td>181.0</td>\n",
|
||||||
|
" <td>C</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>32.0</td>\n",
|
||||||
|
" <td>M</td>\n",
|
||||||
|
" <td>174.5</td>\n",
|
||||||
|
" <td>71.10</td>\n",
|
||||||
|
" <td>18.4</td>\n",
|
||||||
|
" <td>76.0</td>\n",
|
||||||
|
" <td>147.0</td>\n",
|
||||||
|
" <td>41.4</td>\n",
|
||||||
|
" <td>15.2</td>\n",
|
||||||
|
" <td>53.0</td>\n",
|
||||||
|
" <td>219.0</td>\n",
|
||||||
|
" <td>B</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>28.0</td>\n",
|
||||||
|
" <td>M</td>\n",
|
||||||
|
" <td>173.8</td>\n",
|
||||||
|
" <td>67.70</td>\n",
|
||||||
|
" <td>17.1</td>\n",
|
||||||
|
" <td>70.0</td>\n",
|
||||||
|
" <td>127.0</td>\n",
|
||||||
|
" <td>43.5</td>\n",
|
||||||
|
" <td>27.1</td>\n",
|
||||||
|
" <td>45.0</td>\n",
|
||||||
|
" <td>217.0</td>\n",
|
||||||
|
" <td>B</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" age gender height_cm weight_kg body fat_% diastolic systolic \\\n",
|
||||||
|
"0 27.0 M 172.3 75.24 21.3 80.0 130.0 \n",
|
||||||
|
"1 25.0 M 165.0 55.80 15.7 77.0 126.0 \n",
|
||||||
|
"2 31.0 M 179.6 78.00 20.1 92.0 152.0 \n",
|
||||||
|
"3 32.0 M 174.5 71.10 18.4 76.0 147.0 \n",
|
||||||
|
"4 28.0 M 173.8 67.70 17.1 70.0 127.0 \n",
|
||||||
|
"\n",
|
||||||
|
" gripForce sit and bend forward_cm sit-ups counts broad jump_cm class \n",
|
||||||
|
"0 54.9 18.4 60.0 217.0 C \n",
|
||||||
|
"1 36.4 16.3 53.0 229.0 A \n",
|
||||||
|
"2 44.8 12.0 49.0 181.0 C \n",
|
||||||
|
"3 41.4 15.2 53.0 219.0 B \n",
|
||||||
|
"4 43.5 27.1 45.0 217.0 B "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 116,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 117,
|
||||||
|
"id": "received-absence",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']\n",
|
||||||
|
"df = df[cols]\n",
|
||||||
|
"\n",
|
||||||
|
"# male - 0, female - 1\n",
|
||||||
|
"df['gender'].replace({'M': 0, 'F': 1}, inplace = True)\n",
|
||||||
|
"df = df.dropna(how='any')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 118,
|
||||||
|
"id": "excited-parent",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0 0.632196\n",
|
||||||
|
"1 0.367804\n",
|
||||||
|
"Name: gender, dtype: float64"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 118,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.gender.value_counts() / df.shape[0]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 119,
|
||||||
|
"id": "extended-cinema",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]\n",
|
||||||
|
"y = df[['gender']]\n",
|
||||||
|
"\n",
|
||||||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 120,
|
||||||
|
"id": "animated-farming",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"torch.Size([10714, 5]) torch.Size([10714])\n",
|
||||||
|
"torch.Size([2679, 5]) torch.Size([2679])\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"X_train = torch.from_numpy(np.array(X_train)).float()\n",
|
||||||
|
"y_train = torch.squeeze(torch.from_numpy(y_train.values).float())\n",
|
||||||
|
"\n",
|
||||||
|
"X_test = torch.from_numpy(np.array(X_test)).float()\n",
|
||||||
|
"y_test = torch.squeeze(torch.from_numpy(y_test.values).float())\n",
|
||||||
|
"\n",
|
||||||
|
"print(X_train.shape, y_train.shape)\n",
|
||||||
|
"print(X_test.shape, y_test.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 121,
|
||||||
|
"id": "technical-wallet",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class Net(nn.Module):\n",
|
||||||
|
" def __init__(self, n_features):\n",
|
||||||
|
" super(Net, self).__init__()\n",
|
||||||
|
" self.fc1 = nn.Linear(n_features, 5)\n",
|
||||||
|
" self.fc2 = nn.Linear(5, 3)\n",
|
||||||
|
" self.fc3 = nn.Linear(3, 1)\n",
|
||||||
|
" def forward(self, x):\n",
|
||||||
|
" x = F.relu(self.fc1(x))\n",
|
||||||
|
" x = F.relu(self.fc2(x))\n",
|
||||||
|
" return torch.sigmoid(self.fc3(x))\n",
|
||||||
|
"net = Net(X_train.shape[1])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 122,
|
||||||
|
"id": "requested-plymouth",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"criterion = nn.BCELoss()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 123,
|
||||||
|
"id": "iraqi-english",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"optimizer = optim.Adam(net.parameters(), lr=0.001)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 124,
|
||||||
|
"id": "emerging-helmet",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 125,
|
||||||
|
"id": "differential-aviation",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_train = X_train.to(device)\n",
|
||||||
|
"y_train = y_train.to(device)\n",
|
||||||
|
"X_test = X_test.to(device)\n",
|
||||||
|
"y_test = y_test.to(device)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 126,
|
||||||
|
"id": "ranging-calgary",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"net = net.to(device)\n",
|
||||||
|
"criterion = criterion.to(device)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 127,
|
||||||
|
"id": "iraqi-blanket",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def calculate_accuracy(y_true, y_pred):\n",
|
||||||
|
" predicted = y_pred.ge(.5).view(-1)\n",
|
||||||
|
" return (y_true == predicted).sum().float() / len(y_true)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 128,
|
||||||
|
"id": "robust-serbia",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"epoch 0\n",
|
||||||
|
"Train set - loss: 1.005, accuracy: 0.37\n",
|
||||||
|
"Test set - loss: 1.018, accuracy: 0.358\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 100\n",
|
||||||
|
"Train set - loss: 0.677, accuracy: 0.743\n",
|
||||||
|
"Test set - loss: 0.679, accuracy: 0.727\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 200\n",
|
||||||
|
"Train set - loss: 0.636, accuracy: 0.79\n",
|
||||||
|
"Test set - loss: 0.64, accuracy: 0.778\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 300\n",
|
||||||
|
"Train set - loss: 0.568, accuracy: 0.839\n",
|
||||||
|
"Test set - loss: 0.577, accuracy: 0.833\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 400\n",
|
||||||
|
"Train set - loss: 0.504, accuracy: 0.885\n",
|
||||||
|
"Test set - loss: 0.514, accuracy: 0.877\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 500\n",
|
||||||
|
"Train set - loss: 0.441, accuracy: 0.922\n",
|
||||||
|
"Test set - loss: 0.45, accuracy: 0.913\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 600\n",
|
||||||
|
"Train set - loss: 0.388, accuracy: 0.944\n",
|
||||||
|
"Test set - loss: 0.396, accuracy: 0.938\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 700\n",
|
||||||
|
"Train set - loss: 0.353, accuracy: 0.954\n",
|
||||||
|
"Test set - loss: 0.359, accuracy: 0.949\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 800\n",
|
||||||
|
"Train set - loss: 0.327, accuracy: 0.958\n",
|
||||||
|
"Test set - loss: 0.333, accuracy: 0.953\n",
|
||||||
|
"\n",
|
||||||
|
"epoch 900\n",
|
||||||
|
"Train set - loss: 0.306, accuracy: 0.961\n",
|
||||||
|
"Test set - loss: 0.312, accuracy: 0.955\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"def round_tensor(t, decimal_places=3):\n",
|
||||||
|
" return round(t.item(), decimal_places)\n",
|
||||||
|
"for epoch in range(1000):\n",
|
||||||
|
" y_pred = net(X_train)\n",
|
||||||
|
" y_pred = torch.squeeze(y_pred)\n",
|
||||||
|
" train_loss = criterion(y_pred, y_train)\n",
|
||||||
|
" if epoch % 100 == 0:\n",
|
||||||
|
" train_acc = calculate_accuracy(y_train, y_pred)\n",
|
||||||
|
" y_test_pred = net(X_test)\n",
|
||||||
|
" y_test_pred = torch.squeeze(y_test_pred)\n",
|
||||||
|
" test_loss = criterion(y_test_pred, y_test)\n",
|
||||||
|
" test_acc = calculate_accuracy(y_test, y_test_pred)\n",
|
||||||
|
" print(\n",
|
||||||
|
"f'''epoch {epoch}\n",
|
||||||
|
"Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}\n",
|
||||||
|
"Test set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}\n",
|
||||||
|
"''')\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" train_loss.backward()\n",
|
||||||
|
" optimizer.step()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 129,
|
||||||
|
"id": "optimum-excerpt",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# torch.save(net, 'model.pth')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 130,
|
||||||
|
"id": "dental-seating",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# net = torch.load('model.pth')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 131,
|
||||||
|
"id": "german-satisfaction",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" precision recall f1-score support\n",
|
||||||
|
"\n",
|
||||||
|
" Male 0.97 0.96 0.96 1720\n",
|
||||||
|
" Female 0.93 0.94 0.94 959\n",
|
||||||
|
"\n",
|
||||||
|
" accuracy 0.95 2679\n",
|
||||||
|
" macro avg 0.95 0.95 0.95 2679\n",
|
||||||
|
"weighted avg 0.95 0.95 0.95 2679\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"classes = ['Male', 'Female']\n",
|
||||||
|
"y_pred = net(X_test)\n",
|
||||||
|
"y_pred = y_pred.ge(.5).view(-1).cpu()\n",
|
||||||
|
"y_test = y_test.cpu()\n",
|
||||||
|
"print(classification_report(y_test, y_pred, target_names=classes))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 132,
|
||||||
|
"id": "british-incidence",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('test_out.csv', 'w') as file:\n",
|
||||||
|
" for y in y_pred:\n",
|
||||||
|
" file.write(classes[y.item()])\n",
|
||||||
|
" file.write('\\n')"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
1
data/.gitignore
vendored
Normal file
1
data/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/bodyPerformance.csv
|
4
data/bodyPerformance.csv.dvc
Normal file
4
data/bodyPerformance.csv.dvc
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
outs:
|
||||||
|
- md5: 6d7c3e3d110fac2ade9d8bce60238208
|
||||||
|
size: 761835
|
||||||
|
path: bodyPerformance.csv
|
3
download_data.sh
Normal file
3
download_data.sh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#kaggle datasets download -d tejashvi14/travel-insurance-prediction-data
|
||||||
|
unzip -o travel-insurance-prediction-data.zip
|
||||||
|
head -n $CUTOFF TravelInsurancePrediction.csv > travel_insurance_data.txt
|
17
dvc.Jenkinsfile
Normal file
17
dvc.Jenkinsfile
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Dvc pull and reproduce') {
|
||||||
|
steps {
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
|
||||||
|
withCredentials(
|
||||||
|
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
|
||||||
|
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
||||||
|
sh 'dvc pull'
|
||||||
|
sh 'dvc repro'}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
10
dvc.yaml
Normal file
10
dvc.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
stages:
|
||||||
|
prepare_datasets:
|
||||||
|
cmd: python3 prepare_datasets.py
|
||||||
|
deps:
|
||||||
|
- data/bodyPerformance.csv
|
||||||
|
- prepare_datasets.py
|
||||||
|
train:
|
||||||
|
cmd: python3 train.py
|
||||||
|
deps:
|
||||||
|
- train.py
|
124
environment.yml
Normal file
124
environment.yml
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
name: s444421
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- alembic=1.7.7=pyhd8ed1ab_0
|
||||||
|
- appdirs=1.4.4=pyh9f0ad1d_0
|
||||||
|
- asn1crypto=1.5.1=pyhd8ed1ab_0
|
||||||
|
- blas=1.0=mkl
|
||||||
|
- bottleneck=1.3.4=py39hce1f21e_0
|
||||||
|
- brotlipy=0.7.0=py39hb9d737c_1004
|
||||||
|
- ca-certificates=2022.5.18.1=ha878542_0
|
||||||
|
- certifi=2022.5.18.1=py39hf3d152e_0
|
||||||
|
- cffi=1.15.0=py39hd667e15_1
|
||||||
|
- charset-normalizer=2.0.12=pyhd8ed1ab_0
|
||||||
|
- click=8.1.3=py39hf3d152e_0
|
||||||
|
- cloudpickle=2.1.0=pyhd8ed1ab_0
|
||||||
|
- configparser=5.2.0=pyhd8ed1ab_0
|
||||||
|
- cryptography=37.0.2=py39hd97740a_0
|
||||||
|
- cycler=0.11.0=pyhd8ed1ab_0
|
||||||
|
- databricks-cli=0.12.1=pyhd8ed1ab_0
|
||||||
|
- docker-py=5.0.3=py39hf3d152e_2
|
||||||
|
- docker-pycreds=0.4.0=py_0
|
||||||
|
- entrypoints=0.4=pyhd8ed1ab_0
|
||||||
|
- flask=2.1.2=pyhd8ed1ab_1
|
||||||
|
- freetype=2.10.4=h0708190_1
|
||||||
|
- future=0.18.2=py39hf3d152e_5
|
||||||
|
- gitdb=4.0.9=pyhd8ed1ab_0
|
||||||
|
- gitpython=3.1.27=pyhd8ed1ab_0
|
||||||
|
- greenlet=1.1.2=py39h5a03fae_2
|
||||||
|
- gunicorn=20.1.0=py39hf3d152e_2
|
||||||
|
- idna=3.3=pyhd8ed1ab_0
|
||||||
|
- importlib-metadata=4.11.3=py39hf3d152e_1
|
||||||
|
- importlib_resources=5.7.1=pyhd8ed1ab_1
|
||||||
|
- intel-openmp=2021.4.0=h06a4308_3561
|
||||||
|
- itsdangerous=2.1.2=pyhd8ed1ab_0
|
||||||
|
- jinja2=3.1.2=pyhd8ed1ab_0
|
||||||
|
- joblib=1.1.0=pyhd8ed1ab_0
|
||||||
|
- jpeg=9e=h166bdaf_1
|
||||||
|
- kiwisolver=1.4.2=py39hf939315_1
|
||||||
|
- lcms2=2.12=hddcbb42_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libblas=3.9.0=12_linux64_mkl
|
||||||
|
- libcblas=3.9.0=12_linux64_mkl
|
||||||
|
- libffi=3.3=he6710b0_2
|
||||||
|
- libgcc-ng=11.2.0=h1234567_0
|
||||||
|
- libgfortran-ng=12.1.0=h69a702a_16
|
||||||
|
- libgfortran5=12.1.0=hdcd56e2_16
|
||||||
|
- libgomp=11.2.0=h1234567_0
|
||||||
|
- liblapack=3.9.0=12_linux64_mkl
|
||||||
|
- libpng=1.6.37=h21135ba_2
|
||||||
|
- libprotobuf=3.19.1=h4ff587b_0
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_0
|
||||||
|
- libtiff=4.2.0=h85742a9_0
|
||||||
|
- libwebp-base=1.2.2=h7f98852_1
|
||||||
|
- lz4-c=1.9.3=h9c3ff4c_1
|
||||||
|
- mako=1.2.0=pyhd8ed1ab_1
|
||||||
|
- markupsafe=2.1.1=py39hb9d737c_1
|
||||||
|
- matplotlib-base=3.4.3=py39h2fa2bec_2
|
||||||
|
- mkl=2021.4.0=h06a4308_640
|
||||||
|
- mkl-service=2.4.0=py39h7f8727e_0
|
||||||
|
- mkl_fft=1.3.1=py39hd3c417c_0
|
||||||
|
- mkl_random=1.2.2=py39h51133e4_0
|
||||||
|
- mlflow=1.26.0=py39ha39b057_0
|
||||||
|
- ncurses=6.3=h7f8727e_2
|
||||||
|
- ninja=1.11.0=h924138e_0
|
||||||
|
- numexpr=2.8.1=py39h6abb31d_0
|
||||||
|
- numpy=1.22.3=py39he7a7128_0
|
||||||
|
- numpy-base=1.22.3=py39hf524024_0
|
||||||
|
- olefile=0.46=pyh9f0ad1d_1
|
||||||
|
- openssl=1.1.1o=h166bdaf_0
|
||||||
|
- packaging=21.3=pyhd3eb1b0_0
|
||||||
|
- pandas=1.4.2=py39h295c915_0
|
||||||
|
- patsy=0.5.2=pyhd8ed1ab_0
|
||||||
|
- pillow=7.2.0=py39h6f3857e_2
|
||||||
|
- pip=21.2.4=py39h06a4308_0
|
||||||
|
- prometheus_client=0.14.1=pyhd8ed1ab_0
|
||||||
|
- prometheus_flask_exporter=0.20.1=pyhd8ed1ab_0
|
||||||
|
- protobuf=3.19.1=py39h295c915_0
|
||||||
|
- pycparser=2.21=pyhd8ed1ab_0
|
||||||
|
- pyopenssl=22.0.0=pyhd8ed1ab_0
|
||||||
|
- pyparsing=3.0.4=pyhd3eb1b0_0
|
||||||
|
- pysocks=1.7.1=py39hf3d152e_5
|
||||||
|
- python=3.9.12=h12debd9_0
|
||||||
|
- python-dateutil=2.8.2=pyhd3eb1b0_0
|
||||||
|
- python_abi=3.9=2_cp39
|
||||||
|
- pytorch=1.10.0=cpu_py39hc70245e_1
|
||||||
|
- pytz=2021.3=pyhd3eb1b0_0
|
||||||
|
- pyyaml=6.0=py39hb9d737c_4
|
||||||
|
- querystring_parser=1.2.4=py_0
|
||||||
|
- readline=8.1.2=h7f8727e_1
|
||||||
|
- requests=2.27.1=pyhd8ed1ab_0
|
||||||
|
- scikit-learn=1.1.1=py39h4037b75_0
|
||||||
|
- scipy=1.8.0=py39hee8e79c_1
|
||||||
|
- seaborn=0.11.2=hd8ed1ab_0
|
||||||
|
- seaborn-base=0.11.2=pyhd8ed1ab_0
|
||||||
|
- setuptools=61.2.0=py39h06a4308_0
|
||||||
|
- six=1.16.0=pyhd3eb1b0_1
|
||||||
|
- sleef=3.5.1=h9b69904_2
|
||||||
|
- smmap=3.0.5=pyh44b312d_0
|
||||||
|
- sqlalchemy=1.4.36=py39hb9d737c_0
|
||||||
|
- sqlite=3.38.3=hc218d9a_0
|
||||||
|
- sqlparse=0.4.2=pyhd8ed1ab_0
|
||||||
|
- statsmodels=0.13.2=py39hce5d2b2_0
|
||||||
|
- tabulate=0.8.9=pyhd8ed1ab_0
|
||||||
|
- tenacity=8.0.1=pyhd8ed1ab_0
|
||||||
|
- threadpoolctl=3.1.0=pyh8a188c0_0
|
||||||
|
- tk=8.6.11=h1ccaba5_1
|
||||||
|
- tornado=6.1=py39hb9d737c_3
|
||||||
|
- typing_extensions=4.2.0=pyha770c72_1
|
||||||
|
- tzdata=2022a=hda174b7_0
|
||||||
|
- urllib3=1.26.9=pyhd8ed1ab_0
|
||||||
|
- websocket-client=1.3.2=pyhd8ed1ab_0
|
||||||
|
- werkzeug=2.1.2=pyhd8ed1ab_1
|
||||||
|
- wheel=0.37.1=pyhd3eb1b0_0
|
||||||
|
- xz=5.2.5=h7f8727e_1
|
||||||
|
- yaml=0.2.5=h7f98852_2
|
||||||
|
- zipp=3.8.0=pyhd8ed1ab_0
|
||||||
|
- zlib=1.2.12=h7f8727e_2
|
||||||
|
- zstd=1.4.9=ha95c52a_0
|
||||||
|
prefix: /home/agata/anaconda3/envs/s444421
|
||||||
|
|
@ -1,41 +0,0 @@
|
|||||||
def ACC = ''
|
|
||||||
|
|
||||||
pipeline {
|
|
||||||
agent {
|
|
||||||
dockerfile true
|
|
||||||
}
|
|
||||||
parameters {
|
|
||||||
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training_and_evaluation', name: 'BRANCH', type: 'PT_BRANCH'
|
|
||||||
buildSelector(
|
|
||||||
defaultSelector: lastSuccessful(),
|
|
||||||
description: 'Which build to use for copying artifacts',
|
|
||||||
name: 'BUILD_SELECTOR'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
stages {
|
|
||||||
stage('Stage 1') {
|
|
||||||
steps {
|
|
||||||
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git'
|
|
||||||
copyArtifacts filter: '*', projectName:'s444421-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
|
||||||
copyArtifacts filter: '*', projectName:'s444421-training/${BRANCH}/', selector: buildParameter('BUILD_SELECTOR')
|
|
||||||
copyArtifacts filter: '*', projectName:'s444421-evaluation/training_and_evaluation', optional: true
|
|
||||||
sh 'ipython ./evaluation.py'
|
|
||||||
archiveArtifacts artifacts: 'build_accuracy.txt, bilds_accuracy.jpg'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
post {
|
|
||||||
success {
|
|
||||||
emailext body: 'SUCCESS', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
|
||||||
}
|
|
||||||
failure {
|
|
||||||
emailext body: 'FAILURE', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
|
||||||
}
|
|
||||||
aborted {
|
|
||||||
emailext body: 'ABORTED', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
|
||||||
}
|
|
||||||
changed {
|
|
||||||
emailext body: 'CHANGED', subject: 's444421-evaluation status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,89 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
import torch
|
|
||||||
from torch import nn, optim
|
|
||||||
import torch.nn.functional as F
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Module):
|
|
||||||
def __init__(self, n_features):
|
|
||||||
super(Net, self).__init__()
|
|
||||||
self.fc1 = nn.Linear(n_features, 5)
|
|
||||||
self.fc2 = nn.Linear(5, 3)
|
|
||||||
self.fc3 = nn.Linear(3, 1)
|
|
||||||
def forward(self, x):
|
|
||||||
x = F.relu(self.fc1(x))
|
|
||||||
x = F.relu(self.fc2(x))
|
|
||||||
return torch.sigmoid(self.fc3(x))
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
X_test = pd.read_csv('X_test.csv')
|
|
||||||
y_test = pd.read_csv('y_test.csv')
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
X_test = torch.from_numpy(np.array(X_test)).float()
|
|
||||||
y_test = torch.squeeze(torch.from_numpy(y_test.values).float())
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
||||||
X_test = X_test.to(device)
|
|
||||||
y_test = y_test.to(device)
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
net = torch.load('model.pth')
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
y_pred = net(X_test)
|
|
||||||
y_pred = y_pred.ge(.5).view(-1).cpu()
|
|
||||||
y_test = y_test.cpu()
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
accuracy = accuracy_score(y_test, y_pred)
|
|
||||||
with open('build_accuracy.txt', 'a') as file:
|
|
||||||
file.write(str(accuracy))
|
|
||||||
file.write('\n')
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
with open('build_accuracy.txt') as file:
|
|
||||||
acc = [float(line.rstrip()) for line in file]
|
|
||||||
|
|
||||||
builds = list(range(1, len(acc) + 1))
|
|
||||||
|
|
||||||
plt.xlabel('build')
|
|
||||||
plt.ylabel('accuracy')
|
|
||||||
plt.plot(builds, acc, 'ro')
|
|
||||||
plt.show()
|
|
||||||
plt.savefig('bilds_accuracy.jpg')
|
|
||||||
|
|
1
get_stats.sh
Normal file
1
get_stats.sh
Normal file
@ -0,0 +1 @@
|
|||||||
|
wc -l travel_insurance_data.txt > stats.txt
|
@ -1,31 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
agent {
|
|
||||||
dockerfile {
|
|
||||||
filename 'Dockerfile'}
|
|
||||||
}
|
|
||||||
parameters {
|
|
||||||
buildSelector(
|
|
||||||
defaultSelector: lastSuccessful(),
|
|
||||||
description: 'Which build to use for copying artifacts',
|
|
||||||
name: 'BUILD_SELECTOR'
|
|
||||||
)
|
|
||||||
string(
|
|
||||||
defaultValue: '{\\"inputs\\": [[167.39999389648438, 72.18000030517578, 40.0, 21.0, 94.0], [162.3000030517578, 67.30000305175781, 18.0, 52.0, 219.0], [178.5, 90.5, 14.699999809265137, 45.0, 262.0], [180.89999389648438, 77.0999984741211, 25.399999618530273, 43.0, 224.0], [177.3000030517578, 88.4800033569336, 35.599998474121094, 18.0, 183.0]]}',
|
|
||||||
description: 'Inputs',
|
|
||||||
name: 'INPUT'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
stages {
|
|
||||||
stage('Copy artifacts') {
|
|
||||||
steps {
|
|
||||||
copyArtifacts fingerprintArtifacts: true, projectName: 's444421-training/training_and_evaluation', selector: buildParameter('BUILD_SELECTOR')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage('Predict') {
|
|
||||||
steps {
|
|
||||||
sh "echo ${params.INPUT} > input_example.json"
|
|
||||||
sh "ipython ./predict_444501.py"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,9 +0,0 @@
|
|||||||
import mlflow
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
model = mlflow.pyfunc.load_model('mlruns/1/e435ee5c0c5a468c99eb43c13df4a94b/artifacts/s444421')
|
|
||||||
|
|
||||||
with open('input_example.json') as f:
|
|
||||||
input = json.load(f)
|
|
||||||
y_predicted = model.predict(np.array([data['inputs']]).reshape(-1, 2))
|
|
||||||
print(y_predicted[:5])
|
|
2566
preparation.ipynb
Normal file
2566
preparation.ipynb
Normal file
File diff suppressed because one or more lines are too long
104
preparation.py
Normal file
104
preparation.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# get_ipython().system('kaggle datasets download -d tejashvi14/travel-insurance-prediction-data')
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
get_ipython().system('unzip -o travel-insurance-prediction-data.zip')
|
||||||
|
|
||||||
|
|
||||||
|
# In[5]:
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
travel_insurance=pd.read_csv('TravelInsurancePrediction.csv', index_col=0)
|
||||||
|
travel_insurance
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# usunięcie wierszy zawierających braki
|
||||||
|
travel_insurance.dropna(axis='index', how='any')
|
||||||
|
|
||||||
|
|
||||||
|
# In[6]:
|
||||||
|
|
||||||
|
|
||||||
|
# normalizacja danych
|
||||||
|
for column in travel_insurance.columns:
|
||||||
|
if travel_insurance[column].dtype == 'object':
|
||||||
|
travel_insurance[column] = travel_insurance[column].str.lower()
|
||||||
|
|
||||||
|
travel_insurance
|
||||||
|
|
||||||
|
|
||||||
|
# In[8]:
|
||||||
|
|
||||||
|
|
||||||
|
# podział na podzbiory train/dev/test
|
||||||
|
import sklearn
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
travel_insurance_train, travel_insurance_rest = sklearn.model_selection.train_test_split(travel_insurance, test_size=0.4, random_state=1)
|
||||||
|
travel_insurance_test, travel_insurance_dev = sklearn.model_selection.train_test_split(travel_insurance_rest, test_size=0.5, random_state=1)
|
||||||
|
|
||||||
|
|
||||||
|
# In[27]:
|
||||||
|
|
||||||
|
|
||||||
|
travel_insurance.describe(include='all')
|
||||||
|
|
||||||
|
|
||||||
|
# In[23]:
|
||||||
|
|
||||||
|
|
||||||
|
# zwracanie informacji o danym zbiorze
|
||||||
|
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
def printInformation(data):
|
||||||
|
print(f'Size (rows): {len(data)}\n')
|
||||||
|
mean_value = data.mean()
|
||||||
|
min_value = data.min(numeric_only=True)
|
||||||
|
max_value = data.max(numeric_only=True)
|
||||||
|
std_value = data.std()
|
||||||
|
median_value = data.median()
|
||||||
|
print(f'(mean)\n{mean_value}', f'(min)\n{min_value}', f'(max)\n{max_value}', f'(std)\n{std_value}', f'(median)\n{median_value}', sep="\n\n")
|
||||||
|
sns.pairplot(data=data, hue="TravelInsurance")
|
||||||
|
|
||||||
|
|
||||||
|
# In[24]:
|
||||||
|
|
||||||
|
|
||||||
|
printInformation(travel_insurance)
|
||||||
|
|
||||||
|
|
||||||
|
# In[11]:
|
||||||
|
|
||||||
|
|
||||||
|
printInformation(travel_insurance_train)
|
||||||
|
|
||||||
|
|
||||||
|
# In[12]:
|
||||||
|
|
||||||
|
|
||||||
|
printInformation(travel_insurance_test)
|
||||||
|
|
||||||
|
|
||||||
|
# In[13]:
|
||||||
|
|
||||||
|
|
||||||
|
printInformation(travel_insurance_dev)
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
50
prepare_datasets.py
Normal file
50
prepare_datasets.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
# get_ipython().system('unzip -o body-performance-data.zip')
|
||||||
|
|
||||||
|
|
||||||
|
# In[4]:
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
# In[21]:
|
||||||
|
|
||||||
|
|
||||||
|
df = pd.read_csv('data/bodyPerformance.csv')
|
||||||
|
|
||||||
|
|
||||||
|
# In[22]:
|
||||||
|
|
||||||
|
|
||||||
|
cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
|
||||||
|
df = df[cols]
|
||||||
|
|
||||||
|
# male - 0, female - 1
|
||||||
|
df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
|
||||||
|
df = df.dropna(how='any')
|
||||||
|
|
||||||
|
|
||||||
|
# In[23]:
|
||||||
|
|
||||||
|
|
||||||
|
X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
|
||||||
|
y = df[['gender']]
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
|
|
||||||
|
# In[24]:
|
||||||
|
|
||||||
|
|
||||||
|
X_train.to_csv(r'X_train.csv', index=False)
|
||||||
|
X_test.to_csv(r'X_test.csv', index=False)
|
||||||
|
y_train.to_csv(r'y_train.csv', index=False)
|
||||||
|
y_test.to_csv(r'y_test.csv', index=False)
|
||||||
|
|
8
training.py → train.py
Executable file → Normal file
8
training.py → train.py
Executable file → Normal file
@ -15,12 +15,6 @@ import sys
|
|||||||
# In[ ]:
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
epochs = int(sys.argv[1])
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
X_train = pd.read_csv('X_train.csv')
|
X_train = pd.read_csv('X_train.csv')
|
||||||
y_train = pd.read_csv('y_train.csv')
|
y_train = pd.read_csv('y_train.csv')
|
||||||
|
|
||||||
@ -78,7 +72,7 @@ def round_tensor(t, decimal_places=3):
|
|||||||
return round(t.item(), decimal_places)
|
return round(t.item(), decimal_places)
|
||||||
|
|
||||||
|
|
||||||
for epoch in range(epochs):
|
for epoch in range(1000):
|
||||||
y_pred = net(X_train)
|
y_pred = net(X_train)
|
||||||
y_pred = torch.squeeze(y_pred)
|
y_pred = torch.squeeze(y_pred)
|
||||||
train_loss = criterion(y_pred, y_train)
|
train_loss = criterion(y_pred, y_train)
|
@ -1,37 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
agent {
|
|
||||||
dockerfile {
|
|
||||||
filename 'Dockerfile'
|
|
||||||
args '-v /mlruns:/mlruns'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
options {
|
|
||||||
copyArtifactPermission('s444421-predict-s444501');
|
|
||||||
}
|
|
||||||
parameters {
|
|
||||||
buildSelector(
|
|
||||||
defaultSelector: lastSuccessful(),
|
|
||||||
description: 'Which build to use for copying artifacts',
|
|
||||||
name: 'BUILD_SELECTOR'
|
|
||||||
)
|
|
||||||
string(
|
|
||||||
defaultValue: '1000',
|
|
||||||
description: 'Number of epochs',
|
|
||||||
name: 'EPOCHS'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
stages {
|
|
||||||
stage('Check out from version control') {
|
|
||||||
steps {
|
|
||||||
checkout([$class: 'GitSCM', branches: [[name: '*/training_and_evaluation']], extensions: [], userRemoteConfigs: [[credentialsId: 's444421', url: 'https://git.wmi.amu.edu.pl/s444421/ium_444421.git']]])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage('Training') {
|
|
||||||
steps {
|
|
||||||
copyArtifacts filter: '*', projectName:'s444421-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
|
||||||
sh 'ipython ./training_mlflow.py $EPOCHS'
|
|
||||||
archiveArtifacts artifacts: 'mlruns/**'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,131 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
import torch
|
|
||||||
from torch import nn, optim
|
|
||||||
import torch.nn.functional as F
|
|
||||||
import sys
|
|
||||||
import mlflow
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
|
||||||
mlflow.set_experiment("s444421")
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
epochs = int(sys.argv[1])
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_data():
|
|
||||||
X_train = pd.read_csv('X_train.csv')
|
|
||||||
y_train = pd.read_csv('y_train.csv')
|
|
||||||
X_train = torch.from_numpy(np.array(X_train)).float()
|
|
||||||
y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
|
|
||||||
return X_train, y_train
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Module):
|
|
||||||
def __init__(self, n_features):
|
|
||||||
super(Net, self).__init__()
|
|
||||||
self.fc1 = nn.Linear(n_features, 5)
|
|
||||||
self.fc2 = nn.Linear(5, 3)
|
|
||||||
self.fc3 = nn.Linear(3, 1)
|
|
||||||
def forward(self, x):
|
|
||||||
x = F.relu(self.fc1(x))
|
|
||||||
x = F.relu(self.fc2(x))
|
|
||||||
return torch.sigmoid(self.fc3(x))
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_accuracy(y_true, y_pred):
|
|
||||||
predicted = y_pred.ge(.5).view(-1)
|
|
||||||
return (y_true == predicted).sum().float() / len(y_true)
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def round_tensor(t, decimal_places=3):
|
|
||||||
return round(t.item(), decimal_places)
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def train_model(X_train, y_train, device, epochs):
|
|
||||||
net = Net(X_train.shape[1])
|
|
||||||
criterion = nn.BCELoss()
|
|
||||||
optimizer = optim.Adam(net.parameters(), lr=0.001)
|
|
||||||
|
|
||||||
X_train = X_train.to(device)
|
|
||||||
y_train = y_train.to(device)
|
|
||||||
|
|
||||||
net = net.to(device)
|
|
||||||
criterion = criterion.to(device)
|
|
||||||
|
|
||||||
for epoch in range(epochs):
|
|
||||||
y_pred = net(X_train)
|
|
||||||
y_pred = torch.squeeze(y_pred)
|
|
||||||
train_loss = criterion(y_pred, y_train)
|
|
||||||
if epoch % 100 == 0:
|
|
||||||
train_acc = calculate_accuracy(y_train, y_pred)
|
|
||||||
print(
|
|
||||||
f'''epoch {epoch}
|
|
||||||
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
|
|
||||||
''')
|
|
||||||
optimizer.zero_grad()
|
|
||||||
train_loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
return net, round_tensor(train_loss)
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
def my_main(epochs):
|
|
||||||
X_train, y_train = prepare_data()
|
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
||||||
|
|
||||||
model, loss = train_model(X_train, y_train, device, epochs)
|
|
||||||
torch.save(model, 'model.pth')
|
|
||||||
mlflow.log_param("epochs", epochs)
|
|
||||||
mlflow.log_metric("loss", loss)
|
|
||||||
|
|
||||||
X_test = pd.read_csv('X_test.csv')
|
|
||||||
X_test = torch.from_numpy(np.array(X_test)).float()
|
|
||||||
X_test = X_test.to(device)
|
|
||||||
y_pred = model(X_test)
|
|
||||||
y_pred = y_pred.ge(.5).view(-1).cpu()
|
|
||||||
|
|
||||||
signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(y_pred))
|
|
||||||
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
|
||||||
|
|
||||||
if tracking_url_type_store != "file":
|
|
||||||
mlflow.sklearn.log_model(model, "my_model", registered_model_name="s444421", signature=signature, input_example=X_test.numpy()[:5])
|
|
||||||
else:
|
|
||||||
mlflow.sklearn.log_model(model, "my_model", signature=signature, input_example=X_test.numpy()[:5])
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
with mlflow.start_run() as run:
|
|
||||||
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
|
||||||
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
|
||||||
my_main(epochs)
|
|
@ -1,113 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
import torch
|
|
||||||
from torch import nn, optim
|
|
||||||
import torch.nn.functional as F
|
|
||||||
import sys
|
|
||||||
from sacred import Experiment
|
|
||||||
from sacred.observers import FileStorageObserver, MongoObserver
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
ex = Experiment(save_git_info=False)
|
|
||||||
ex.observers.append(FileStorageObserver('my_runs'))
|
|
||||||
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
|
||||||
|
|
||||||
@ex.config
|
|
||||||
def my_config():
|
|
||||||
epochs = 400
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_data():
|
|
||||||
X_train = pd.read_csv('X_train.csv')
|
|
||||||
y_train = pd.read_csv('y_train.csv')
|
|
||||||
X_train = torch.from_numpy(np.array(X_train)).float()
|
|
||||||
y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
|
|
||||||
return X_train, y_train
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Module):
|
|
||||||
def __init__(self, n_features):
|
|
||||||
super(Net, self).__init__()
|
|
||||||
self.fc1 = nn.Linear(n_features, 5)
|
|
||||||
self.fc2 = nn.Linear(5, 3)
|
|
||||||
self.fc3 = nn.Linear(3, 1)
|
|
||||||
def forward(self, x):
|
|
||||||
x = F.relu(self.fc1(x))
|
|
||||||
x = F.relu(self.fc2(x))
|
|
||||||
return torch.sigmoid(self.fc3(x))
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_accuracy(y_true, y_pred):
|
|
||||||
predicted = y_pred.ge(.5).view(-1)
|
|
||||||
return (y_true == predicted).sum().float() / len(y_true)
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def round_tensor(t, decimal_places=3):
|
|
||||||
return round(t.item(), decimal_places)
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
def train_model(X_train, y_train, device, epochs):
|
|
||||||
net = Net(X_train.shape[1])
|
|
||||||
criterion = nn.BCELoss()
|
|
||||||
optimizer = optim.Adam(net.parameters(), lr=0.001)
|
|
||||||
|
|
||||||
X_train = X_train.to(device)
|
|
||||||
y_train = y_train.to(device)
|
|
||||||
|
|
||||||
net = net.to(device)
|
|
||||||
criterion = criterion.to(device)
|
|
||||||
|
|
||||||
for epoch in range(epochs):
|
|
||||||
y_pred = net(X_train)
|
|
||||||
y_pred = torch.squeeze(y_pred)
|
|
||||||
train_loss = criterion(y_pred, y_train)
|
|
||||||
if epoch % 100 == 0:
|
|
||||||
train_acc = calculate_accuracy(y_train, y_pred)
|
|
||||||
print(
|
|
||||||
f'''epoch {epoch}
|
|
||||||
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
|
|
||||||
''')
|
|
||||||
optimizer.zero_grad()
|
|
||||||
train_loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
return net, round_tensor(train_loss)
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
@ex.automain
|
|
||||||
def my_main(epochs, _run):
|
|
||||||
X_train, y_train = prepare_data()
|
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
||||||
|
|
||||||
model, loss = train_model(X_train, y_train, device, epochs)
|
|
||||||
torch.save(model, 'model.pth')
|
|
||||||
ex.add_artifact('model.pth')
|
|
||||||
|
|
||||||
_run.info["epochs"] = epochs
|
|
||||||
_run.info["loss"] = loss
|
|
BIN
travel-insurance-prediction-data.zip
Normal file
BIN
travel-insurance-prediction-data.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user