{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## IUM_05"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "6198de641534b1bc"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Wymagane zależności"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "6a7ce1eb01ab7917"
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "\n",
    "import os\n",
    "\n",
    "from sklearn.metrics import classification_report"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.332889300Z",
     "start_time": "2024-04-04T06:37:55.254786700Z"
    }
   },
   "id": "2d998e3e2e5e8487"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Model"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "7112915408dc0168"
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [],
   "source": [
    "# Neural Network\n",
    "class NeuralNetwork(nn.Module):\n",
    "    \"\"\"\n",
    "    Neural network model for classification problem.\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, input_size, hidden_size):\n",
    "        super(NeuralNetwork, self).__init__()\n",
    "\n",
    "        self.fc1 = nn.Linear(input_size, hidden_size)\n",
    "        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)\n",
    "        self.fc3 = nn.Linear(hidden_size // 2, 1)\n",
    "\n",
    "        self.relu = nn.ReLU()\n",
    "        self.sigmoid = nn.Sigmoid()\n",
    "\n",
    "    def forward(self, x):\n",
    "        out = self.fc1(x)\n",
    "        out = self.relu(out)\n",
    "        out = self.fc2(out)\n",
    "        out = self.relu(out)\n",
    "        out = self.fc3(out)\n",
    "        out = self.sigmoid(out)\n",
    "        return out"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.418810600Z",
     "start_time": "2024-04-04T06:37:55.272635200Z"
    }
   },
   "id": "559708985898a938"
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "outputs": [
    {
     "data": {
      "text/plain": "<torch._C.Generator at 0x226dc7744f0>"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Seed for reproducibility\n",
    "torch.manual_seed(1234)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.452156200Z",
     "start_time": "2024-04-04T06:37:55.284602100Z"
    }
   },
   "id": "aa19c822de265874"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Wczytywanie danych"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "47ebb4d371c1aa3f"
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "outputs": [],
   "source": [
    "# Load data\n",
    "train = pd.read_csv('datasets/train.csv')\n",
    "test = pd.read_csv('datasets/test.csv')\n",
    "\n",
    "# Split data\n",
    "X_train = train.drop(columns=['id', 'diagnosis']).values\n",
    "y_train = train['diagnosis'].values\n",
    "\n",
    "X_test = test.drop(columns=['id', 'diagnosis']).values\n",
    "y_test = test['diagnosis'].values\n",
    "\n",
    "# Convert data to PyTorch tensors\n",
    "X_train = torch.FloatTensor(X_train)\n",
    "y_train = torch.FloatTensor(y_train).view(-1, 1)\n",
    "\n",
    "X_test = torch.FloatTensor(X_test)\n",
    "y_test = torch.FloatTensor(y_test).view(-1, 1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.523028300Z",
     "start_time": "2024-04-04T06:37:55.302118800Z"
    }
   },
   "id": "3b26282bd5803093"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Parametry modelu"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "5776bfd5b5830e40"
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "outputs": [],
   "source": [
    "# Parameters\n",
    "input_size = X_train.shape[1]\n",
    "hidden_size = 128\n",
    "learning_rate = 0.001\n",
    "weight_decay = 0.001\n",
    "num_epochs = 1000"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.525074500Z",
     "start_time": "2024-04-04T06:37:55.330649100Z"
    }
   },
   "id": "997f473b9f1904af"
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [],
   "source": [
    "# Model initialization\n",
    "model = NeuralNetwork(input_size, hidden_size)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.526081600Z",
     "start_time": "2024-04-04T06:37:55.345104500Z"
    }
   },
   "id": "813bf1fe055f7d6e"
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [],
   "source": [
    "# Loss function and optimizer\n",
    "criterion = nn.BCELoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:55.526081600Z",
     "start_time": "2024-04-04T06:37:55.360079200Z"
    }
   },
   "id": "2a7ed7691c54d12b"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Trenowanie modelu"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "c3100fb3ca7a4978"
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [100/1000], Loss: 0.08850393444299698\n",
      "Epoch [200/1000], Loss: 0.039436809718608856\n",
      "Epoch [300/1000], Loss: 0.031037550419569016\n",
      "Epoch [400/1000], Loss: 0.026670493185520172\n",
      "Epoch [500/1000], Loss: 0.023590415716171265\n",
      "Epoch [600/1000], Loss: 0.02146584913134575\n",
      "Epoch [700/1000], Loss: 0.019706938415765762\n",
      "Epoch [800/1000], Loss: 0.018304765224456787\n",
      "Epoch [900/1000], Loss: 0.017177913337945938\n",
      "Epoch [1000/1000], Loss: 0.016160517930984497\n"
     ]
    }
   ],
   "source": [
    "# Training loop\n",
    "for epoch in range(num_epochs):\n",
    "    # Zero the gradients\n",
    "    optimizer.zero_grad()\n",
    "\n",
    "    # Forward pass\n",
    "    outputs = model(X_train)\n",
    "\n",
    "    # Compute loss\n",
    "    loss = criterion(outputs, y_train)\n",
    "\n",
    "    # Backward pass\n",
    "    loss.backward()\n",
    "\n",
    "    # Update weights\n",
    "    optimizer.step()\n",
    "\n",
    "    # Print loss\n",
    "    if (epoch + 1) % 100 == 0:\n",
    "        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:58.081587800Z",
     "start_time": "2024-04-04T06:37:55.376710100Z"
    }
   },
   "id": "ab1005e4ca0e57ae"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Ewaluacja modelu"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "90ae4e97a49d8140"
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           B       0.97      1.00      0.99        34\n",
      "           M       1.00      0.96      0.98        23\n",
      "\n",
      "    accuracy                           0.98        57\n",
      "   macro avg       0.99      0.98      0.98        57\n",
      "weighted avg       0.98      0.98      0.98        57\n"
     ]
    }
   ],
   "source": [
    "# Test the model\n",
    "with torch.no_grad():\n",
    "    y_pred = model(X_test)\n",
    "    y_pred = np.where(y_pred > 0.5, 1, 0)\n",
    "    print(classification_report(y_test, y_pred, target_names=['B', 'M']))    "
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:37:58.167163100Z",
     "start_time": "2024-04-04T06:37:58.080578600Z"
    }
   },
   "id": "1e8bb7d1a0f4d572"
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Zapisywanie modelu do pliku"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "2491b52c3a6f1d39"
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [],
   "source": [
    "# If directory models does not exist, create it\n",
    "if not os.path.exists('./models'):\n",
    "    os.makedirs('./models')\n",
    "\n",
    "# Save the model\n",
    "torch.save(model, './models/model.pth')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-04T06:49:31.738772700Z",
     "start_time": "2024-04-04T06:49:31.718838500Z"
    }
   },
   "id": "b7f509e40380b9c5"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}