{ "cells": [ { "cell_type": "markdown", "source": [ "## IUM_05" ], "metadata": { "collapsed": false }, "id": "6198de641534b1bc" }, { "cell_type": "markdown", "source": [ "#### Wymagane zależności" ], "metadata": { "collapsed": false }, "id": "6a7ce1eb01ab7917" }, { "cell_type": "code", "execution_count": 23, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", "import os\n", "\n", "from sklearn.metrics import classification_report" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.332889300Z", "start_time": "2024-04-04T06:37:55.254786700Z" } }, "id": "2d998e3e2e5e8487" }, { "cell_type": "markdown", "source": [ "#### Model" ], "metadata": { "collapsed": false }, "id": "7112915408dc0168" }, { "cell_type": "code", "execution_count": 24, "outputs": [], "source": [ "# Neural Network\n", "class NeuralNetwork(nn.Module):\n", " \"\"\"\n", " Neural network model for classification problem.\n", " \"\"\"\n", "\n", " def __init__(self, input_size, hidden_size):\n", " super(NeuralNetwork, self).__init__()\n", "\n", " self.fc1 = nn.Linear(input_size, hidden_size)\n", " self.fc2 = nn.Linear(hidden_size, hidden_size // 2)\n", " self.fc3 = nn.Linear(hidden_size // 2, 1)\n", "\n", " self.relu = nn.ReLU()\n", " self.sigmoid = nn.Sigmoid()\n", "\n", " def forward(self, x):\n", " out = self.fc1(x)\n", " out = self.relu(out)\n", " out = self.fc2(out)\n", " out = self.relu(out)\n", " out = self.fc3(out)\n", " out = self.sigmoid(out)\n", " return out" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.418810600Z", "start_time": "2024-04-04T06:37:55.272635200Z" } }, "id": "559708985898a938" }, { "cell_type": "code", "execution_count": 25, "outputs": [ { "data": { "text/plain": "" }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Seed for reproducibility\n", "torch.manual_seed(1234)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.452156200Z", "start_time": "2024-04-04T06:37:55.284602100Z" } }, "id": "aa19c822de265874" }, { "cell_type": "markdown", "source": [ "#### Wczytywanie danych" ], "metadata": { "collapsed": false }, "id": "47ebb4d371c1aa3f" }, { "cell_type": "code", "execution_count": 26, "outputs": [], "source": [ "# Load data\n", "train = pd.read_csv('datasets/train.csv')\n", "test = pd.read_csv('datasets/test.csv')\n", "\n", "# Split data\n", "X_train = train.drop(columns=['id', 'diagnosis']).values\n", "y_train = train['diagnosis'].values\n", "\n", "X_test = test.drop(columns=['id', 'diagnosis']).values\n", "y_test = test['diagnosis'].values\n", "\n", "# Convert data to PyTorch tensors\n", "X_train = torch.FloatTensor(X_train)\n", "y_train = torch.FloatTensor(y_train).view(-1, 1)\n", "\n", "X_test = torch.FloatTensor(X_test)\n", "y_test = torch.FloatTensor(y_test).view(-1, 1)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.523028300Z", "start_time": "2024-04-04T06:37:55.302118800Z" } }, "id": "3b26282bd5803093" }, { "cell_type": "markdown", "source": [ "#### Parametry modelu" ], "metadata": { "collapsed": false }, "id": "5776bfd5b5830e40" }, { "cell_type": "code", "execution_count": 27, "outputs": [], "source": [ "# Parameters\n", "input_size = X_train.shape[1]\n", "hidden_size = 128\n", "learning_rate = 0.001\n", "weight_decay = 0.001\n", "num_epochs = 1000" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.525074500Z", "start_time": "2024-04-04T06:37:55.330649100Z" } }, "id": "997f473b9f1904af" }, { "cell_type": "code", "execution_count": 28, "outputs": [], "source": [ "# Model initialization\n", "model = NeuralNetwork(input_size, hidden_size)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.526081600Z", "start_time": "2024-04-04T06:37:55.345104500Z" } }, "id": "813bf1fe055f7d6e" }, { "cell_type": "code", "execution_count": 29, "outputs": [], "source": [ "# Loss function and optimizer\n", "criterion = nn.BCELoss()\n", "optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:55.526081600Z", "start_time": "2024-04-04T06:37:55.360079200Z" } }, "id": "2a7ed7691c54d12b" }, { "cell_type": "markdown", "source": [ "#### Trenowanie modelu" ], "metadata": { "collapsed": false }, "id": "c3100fb3ca7a4978" }, { "cell_type": "code", "execution_count": 30, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch [100/1000], Loss: 0.08850393444299698\n", "Epoch [200/1000], Loss: 0.039436809718608856\n", "Epoch [300/1000], Loss: 0.031037550419569016\n", "Epoch [400/1000], Loss: 0.026670493185520172\n", "Epoch [500/1000], Loss: 0.023590415716171265\n", "Epoch [600/1000], Loss: 0.02146584913134575\n", "Epoch [700/1000], Loss: 0.019706938415765762\n", "Epoch [800/1000], Loss: 0.018304765224456787\n", "Epoch [900/1000], Loss: 0.017177913337945938\n", "Epoch [1000/1000], Loss: 0.016160517930984497\n" ] } ], "source": [ "# Training loop\n", "for epoch in range(num_epochs):\n", " # Zero the gradients\n", " optimizer.zero_grad()\n", "\n", " # Forward pass\n", " outputs = model(X_train)\n", "\n", " # Compute loss\n", " loss = criterion(outputs, y_train)\n", "\n", " # Backward pass\n", " loss.backward()\n", "\n", " # Update weights\n", " optimizer.step()\n", "\n", " # Print loss\n", " if (epoch + 1) % 100 == 0:\n", " print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:58.081587800Z", "start_time": "2024-04-04T06:37:55.376710100Z" } }, "id": "ab1005e4ca0e57ae" }, { "cell_type": "markdown", "source": [ "#### Ewaluacja modelu" ], "metadata": { "collapsed": false }, "id": "90ae4e97a49d8140" }, { "cell_type": "code", "execution_count": 31, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " B 0.97 1.00 0.99 34\n", " M 1.00 0.96 0.98 23\n", "\n", " accuracy 0.98 57\n", " macro avg 0.99 0.98 0.98 57\n", "weighted avg 0.98 0.98 0.98 57\n" ] } ], "source": [ "# Test the model\n", "with torch.no_grad():\n", " y_pred = model(X_test)\n", " y_pred = np.where(y_pred > 0.5, 1, 0)\n", " print(classification_report(y_test, y_pred, target_names=['B', 'M'])) " ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:37:58.167163100Z", "start_time": "2024-04-04T06:37:58.080578600Z" } }, "id": "1e8bb7d1a0f4d572" }, { "cell_type": "markdown", "source": [ "#### Zapisywanie modelu do pliku" ], "metadata": { "collapsed": false }, "id": "2491b52c3a6f1d39" }, { "cell_type": "code", "execution_count": 34, "outputs": [], "source": [ "# If directory models does not exist, create it\n", "if not os.path.exists('./models'):\n", " os.makedirs('./models')\n", "\n", "# Save the model\n", "torch.save(model, './models/model.pth')" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-04T06:49:31.738772700Z", "start_time": "2024-04-04T06:49:31.718838500Z" } }, "id": "b7f509e40380b9c5" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }