ium_464863/IUM_05.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## IUM_05"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "6198de641534b1bc"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Wymagane zależności"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "6a7ce1eb01ab7917"
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "\n",
    "import os\n",
    "\n",
    "from sklearn.metrics import classification_report"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.332889300Z",
     "start_time": "2024-04-04T06:37:55.254786700Z"
    }
   },
   "id": "2d998e3e2e5e8487"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Model"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "7112915408dc0168"
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [],
   "source": [
    "# Neural Network\n",
    "class NeuralNetwork(nn.Module):\n",
    "    \"\"\"\n",
    "    Neural network model for classification problem.\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, input_size, hidden_size):\n",
    "        super(NeuralNetwork, self).__init__()\n",
    "\n",
    "        self.fc1 = nn.Linear(input_size, hidden_size)\n",
    "        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)\n",
    "        self.fc3 = nn.Linear(hidden_size // 2, 1)\n",
    "\n",
    "        self.relu = nn.ReLU()\n",
    "        self.sigmoid = nn.Sigmoid()\n",
    "\n",
    "    def forward(self, x):\n",
    "        out = self.fc1(x)\n",
    "        out = self.relu(out)\n",
    "        out = self.fc2(out)\n",
    "        out = self.relu(out)\n",
    "        out = self.fc3(out)\n",
    "        out = self.sigmoid(out)\n",
    "        return out"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.418810600Z",
     "start_time": "2024-04-04T06:37:55.272635200Z"
    }
   },
   "id": "559708985898a938"
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "outputs": [
    {
     "data": {
      "text/plain": "<torch._C.Generator at 0x226dc7744f0>"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Seed for reproducibility\n",
    "torch.manual_seed(1234)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.452156200Z",
     "start_time": "2024-04-04T06:37:55.284602100Z"
    }
   },
   "id": "aa19c822de265874"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Wczytywanie danych"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "47ebb4d371c1aa3f"
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "outputs": [],
   "source": [
    "# Load data\n",
    "train = pd.read_csv('datasets/train.csv')\n",
    "test = pd.read_csv('datasets/test.csv')\n",
    "\n",
    "# Split data\n",
    "X_train = train.drop(columns=['id', 'diagnosis']).values\n",
    "y_train = train['diagnosis'].values\n",
    "\n",
    "X_test = test.drop(columns=['id', 'diagnosis']).values\n",
    "y_test = test['diagnosis'].values\n",
    "\n",
    "# Convert data to PyTorch tensors\n",
    "X_train = torch.FloatTensor(X_train)\n",
    "y_train = torch.FloatTensor(y_train).view(-1, 1)\n",
    "\n",
    "X_test = torch.FloatTensor(X_test)\n",
    "y_test = torch.FloatTensor(y_test).view(-1, 1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.523028300Z",
     "start_time": "2024-04-04T06:37:55.302118800Z"
    }
   },
   "id": "3b26282bd5803093"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Parametry modelu"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "5776bfd5b5830e40"
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "outputs": [],
   "source": [
    "# Parameters\n",
    "input_size = X_train.shape[1]\n",
    "hidden_size = 128\n",
    "learning_rate = 0.001\n",
    "weight_decay = 0.001\n",
    "num_epochs = 1000"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.525074500Z",
     "start_time": "2024-04-04T06:37:55.330649100Z"
    }
   },
   "id": "997f473b9f1904af"
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [],
   "source": [
    "# Model initialization\n",
    "model = NeuralNetwork(input_size, hidden_size)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.526081600Z",
     "start_time": "2024-04-04T06:37:55.345104500Z"
    }
   },
   "id": "813bf1fe055f7d6e"
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [],
   "source": [
    "# Loss function and optimizer\n",
    "criterion = nn.BCELoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.526081600Z",
     "start_time": "2024-04-04T06:37:55.360079200Z"
    }
   },
   "id": "2a7ed7691c54d12b"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Trenowanie modelu"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "c3100fb3ca7a4978"
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [100/1000], Loss: 0.08850393444299698\n",
      "Epoch [200/1000], Loss: 0.039436809718608856\n",
      "Epoch [300/1000], Loss: 0.031037550419569016\n",
      "Epoch [400/1000], Loss: 0.026670493185520172\n",
      "Epoch [500/1000], Loss: 0.023590415716171265\n",
      "Epoch [600/1000], Loss: 0.02146584913134575\n",
      "Epoch [700/1000], Loss: 0.019706938415765762\n",
      "Epoch [800/1000], Loss: 0.018304765224456787\n",
      "Epoch [900/1000], Loss: 0.017177913337945938\n",
      "Epoch [1000/1000], Loss: 0.016160517930984497\n"
     ]
    }
   ],
   "source": [
    "# Training loop\n",
    "for epoch in range(num_epochs):\n",
    "    # Zero the gradients\n",
    "    optimizer.zero_grad()\n",
    "\n",
    "    # Forward pass\n",
    "    outputs = model(X_train)\n",
    "\n",
    "    # Compute loss\n",
    "    loss = criterion(outputs, y_train)\n",
    "\n",
    "    # Backward pass\n",
    "    loss.backward()\n",
    "\n",
    "    # Update weights\n",
    "    optimizer.step()\n",
    "\n",
    "    # Print loss\n",
    "    if (epoch + 1) % 100 == 0:\n",
    "        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:58.081587800Z",
     "start_time": "2024-04-04T06:37:55.376710100Z"
    }
   },
   "id": "ab1005e4ca0e57ae"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Ewaluacja modelu"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "90ae4e97a49d8140"
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           B       0.97      1.00      0.99        34\n",
      "           M       1.00      0.96      0.98        23\n",
      "\n",
      "    accuracy                           0.98        57\n",
      "   macro avg       0.99      0.98      0.98        57\n",
      "weighted avg       0.98      0.98      0.98        57\n"
     ]
    }
   ],
   "source": [
    "# Test the model\n",
    "with torch.no_grad():\n",
    "    y_pred = model(X_test)\n",
    "    y_pred = np.where(y_pred > 0.5, 1, 0)\n",
    "    print(classification_report(y_test, y_pred, target_names=['B', 'M']))    "
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:58.167163100Z",
     "start_time": "2024-04-04T06:37:58.080578600Z"
    }
   },
   "id": "1e8bb7d1a0f4d572"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Zapisywanie modelu do pliku"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "2491b52c3a6f1d39"
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [],
   "source": [
    "# If directory models does not exist, create it\n",
    "if not os.path.exists('./models'):\n",
    "    os.makedirs('./models')\n",
    "\n",
    "# Save the model\n",
    "torch.save(model, './models/model.pth')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:49:31.738772700Z",
     "start_time": "2024-04-04T06:49:31.718838500Z"
    }
   },
   "id": "b7f509e40380b9c5"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
IUM_05 - add train and prediction scripts, update Docker env, update Jenkinsfile 2024-04-04 09:06:39 +02:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"## IUM_05"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "6198de641534b1bc"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Wymagane zależności"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "6a7ce1eb01ab7917"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 23,`
			`"outputs": [],`
			`"source": [`
			`"import numpy as np\n",`
			`"import pandas as pd\n",`
			`"import torch\n",`
			`"import torch.nn as nn\n",`
			`"import torch.optim as optim\n",`
			`"\n",`
			`"import os\n",`
			`"\n",`
			`"from sklearn.metrics import classification_report"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.332889300Z",`
			`"start_time": "2024-04-04T06:37:55.254786700Z"`
			`}`
			`},`
			`"id": "2d998e3e2e5e8487"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Model"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "7112915408dc0168"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 24,`
			`"outputs": [],`
			`"source": [`
			`"# Neural Network\n",`
			`"class NeuralNetwork(nn.Module):\n",`
			`" \"\"\"\n",`
			`" Neural network model for classification problem.\n",`
			`" \"\"\"\n",`
			`"\n",`
			`" def __init__(self, input_size, hidden_size):\n",`
			`" super(NeuralNetwork, self).__init__()\n",`
			`"\n",`
			`" self.fc1 = nn.Linear(input_size, hidden_size)\n",`
			`" self.fc2 = nn.Linear(hidden_size, hidden_size // 2)\n",`
			`" self.fc3 = nn.Linear(hidden_size // 2, 1)\n",`
			`"\n",`
			`" self.relu = nn.ReLU()\n",`
			`" self.sigmoid = nn.Sigmoid()\n",`
			`"\n",`
			`" def forward(self, x):\n",`
			`" out = self.fc1(x)\n",`
			`" out = self.relu(out)\n",`
			`" out = self.fc2(out)\n",`
			`" out = self.relu(out)\n",`
			`" out = self.fc3(out)\n",`
			`" out = self.sigmoid(out)\n",`
			`" return out"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.418810600Z",`
			`"start_time": "2024-04-04T06:37:55.272635200Z"`
			`}`
			`},`
			`"id": "559708985898a938"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 25,`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": "<torch._C.Generator at 0x226dc7744f0>"`
			`},`
			`"execution_count": 25,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"# Seed for reproducibility\n",`
			`"torch.manual_seed(1234)"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.452156200Z",`
			`"start_time": "2024-04-04T06:37:55.284602100Z"`
			`}`
			`},`
			`"id": "aa19c822de265874"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Wczytywanie danych"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "47ebb4d371c1aa3f"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 26,`
			`"outputs": [],`
			`"source": [`
			`"# Load data\n",`
			`"train = pd.read_csv('datasets/train.csv')\n",`
			`"test = pd.read_csv('datasets/test.csv')\n",`
			`"\n",`
			`"# Split data\n",`
			`"X_train = train.drop(columns=['id', 'diagnosis']).values\n",`
			`"y_train = train['diagnosis'].values\n",`
			`"\n",`
			`"X_test = test.drop(columns=['id', 'diagnosis']).values\n",`
			`"y_test = test['diagnosis'].values\n",`
			`"\n",`
			`"# Convert data to PyTorch tensors\n",`
			`"X_train = torch.FloatTensor(X_train)\n",`
			`"y_train = torch.FloatTensor(y_train).view(-1, 1)\n",`
			`"\n",`
			`"X_test = torch.FloatTensor(X_test)\n",`
			`"y_test = torch.FloatTensor(y_test).view(-1, 1)"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.523028300Z",`
			`"start_time": "2024-04-04T06:37:55.302118800Z"`
			`}`
			`},`
			`"id": "3b26282bd5803093"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Parametry modelu"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "5776bfd5b5830e40"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 27,`
			`"outputs": [],`
			`"source": [`
			`"# Parameters\n",`
			`"input_size = X_train.shape[1]\n",`
			`"hidden_size = 128\n",`
			`"learning_rate = 0.001\n",`
			`"weight_decay = 0.001\n",`
			`"num_epochs = 1000"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.525074500Z",`
			`"start_time": "2024-04-04T06:37:55.330649100Z"`
			`}`
			`},`
			`"id": "997f473b9f1904af"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 28,`
			`"outputs": [],`
			`"source": [`
			`"# Model initialization\n",`
			`"model = NeuralNetwork(input_size, hidden_size)"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.526081600Z",`
			`"start_time": "2024-04-04T06:37:55.345104500Z"`
			`}`
			`},`
			`"id": "813bf1fe055f7d6e"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 29,`
			`"outputs": [],`
			`"source": [`
			`"# Loss function and optimizer\n",`
			`"criterion = nn.BCELoss()\n",`
			`"optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:55.526081600Z",`
			`"start_time": "2024-04-04T06:37:55.360079200Z"`
			`}`
			`},`
			`"id": "2a7ed7691c54d12b"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Trenowanie modelu"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "c3100fb3ca7a4978"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 30,`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Epoch [100/1000], Loss: 0.08850393444299698\n",`
			`"Epoch [200/1000], Loss: 0.039436809718608856\n",`
			`"Epoch [300/1000], Loss: 0.031037550419569016\n",`
			`"Epoch [400/1000], Loss: 0.026670493185520172\n",`
			`"Epoch [500/1000], Loss: 0.023590415716171265\n",`
			`"Epoch [600/1000], Loss: 0.02146584913134575\n",`
			`"Epoch [700/1000], Loss: 0.019706938415765762\n",`
			`"Epoch [800/1000], Loss: 0.018304765224456787\n",`
			`"Epoch [900/1000], Loss: 0.017177913337945938\n",`
			`"Epoch [1000/1000], Loss: 0.016160517930984497\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"# Training loop\n",`
			`"for epoch in range(num_epochs):\n",`
			`" # Zero the gradients\n",`
			`" optimizer.zero_grad()\n",`
			`"\n",`
			`" # Forward pass\n",`
			`" outputs = model(X_train)\n",`
			`"\n",`
			`" # Compute loss\n",`
			`" loss = criterion(outputs, y_train)\n",`
			`"\n",`
			`" # Backward pass\n",`
			`" loss.backward()\n",`
			`"\n",`
			`" # Update weights\n",`
			`" optimizer.step()\n",`
			`"\n",`
			`" # Print loss\n",`
			`" if (epoch + 1) % 100 == 0:\n",`
			`" print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:58.081587800Z",`
			`"start_time": "2024-04-04T06:37:55.376710100Z"`
			`}`
			`},`
			`"id": "ab1005e4ca0e57ae"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Ewaluacja modelu"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "90ae4e97a49d8140"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 31,`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`" precision recall f1-score support\n",`
			`"\n",`
			`" B 0.97 1.00 0.99 34\n",`
			`" M 1.00 0.96 0.98 23\n",`
			`"\n",`
			`" accuracy 0.98 57\n",`
			`" macro avg 0.99 0.98 0.98 57\n",`
			`"weighted avg 0.98 0.98 0.98 57\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"# Test the model\n",`
			`"with torch.no_grad():\n",`
			`" y_pred = model(X_test)\n",`
			`" y_pred = np.where(y_pred > 0.5, 1, 0)\n",`
			`" print(classification_report(y_test, y_pred, target_names=['B', 'M'])) "`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:37:58.167163100Z",`
			`"start_time": "2024-04-04T06:37:58.080578600Z"`
			`}`
			`},`
			`"id": "1e8bb7d1a0f4d572"`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"source": [`
			`"#### Zapisywanie modelu do pliku"`
			`],`
			`"metadata": {`
			`"collapsed": false`
			`},`
			`"id": "2491b52c3a6f1d39"`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 34,`
			`"outputs": [],`
			`"source": [`
			`"# If directory models does not exist, create it\n",`
			`"if not os.path.exists('./models'):\n",`
			`" os.makedirs('./models')\n",`
			`"\n",`
			`"# Save the model\n",`
			`"torch.save(model, './models/model.pth')"`
			`],`
			`"metadata": {`
			`"collapsed": false,`
			`"ExecuteTime": {`
			`"end_time": "2024-04-04T06:49:31.738772700Z",`
			`"start_time": "2024-04-04T06:49:31.718838500Z"`
			`}`
			`},`
			`"id": "b7f509e40380b9c5"`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 2`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython2",`
			`"version": "2.7.6"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`