ium_464863/IUM_05.ipynb

391 lines
9.2 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"source": [
"## IUM_05"
],
"metadata": {
"collapsed": false
},
"id": "6198de641534b1bc"
},
{
"cell_type": "markdown",
"source": [
"#### Wymagane zależności"
],
"metadata": {
"collapsed": false
},
"id": "6a7ce1eb01ab7917"
},
{
"cell_type": "code",
"execution_count": 23,
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"\n",
"import os\n",
"\n",
"from sklearn.metrics import classification_report"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.332889300Z",
"start_time": "2024-04-04T06:37:55.254786700Z"
}
},
"id": "2d998e3e2e5e8487"
},
{
"cell_type": "markdown",
"source": [
"#### Model"
],
"metadata": {
"collapsed": false
},
"id": "7112915408dc0168"
},
{
"cell_type": "code",
"execution_count": 24,
"outputs": [],
"source": [
"# Neural Network\n",
"class NeuralNetwork(nn.Module):\n",
" \"\"\"\n",
" Neural network model for classification problem.\n",
" \"\"\"\n",
"\n",
" def __init__(self, input_size, hidden_size):\n",
" super(NeuralNetwork, self).__init__()\n",
"\n",
" self.fc1 = nn.Linear(input_size, hidden_size)\n",
" self.fc2 = nn.Linear(hidden_size, hidden_size // 2)\n",
" self.fc3 = nn.Linear(hidden_size // 2, 1)\n",
"\n",
" self.relu = nn.ReLU()\n",
" self.sigmoid = nn.Sigmoid()\n",
"\n",
" def forward(self, x):\n",
" out = self.fc1(x)\n",
" out = self.relu(out)\n",
" out = self.fc2(out)\n",
" out = self.relu(out)\n",
" out = self.fc3(out)\n",
" out = self.sigmoid(out)\n",
" return out"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.418810600Z",
"start_time": "2024-04-04T06:37:55.272635200Z"
}
},
"id": "559708985898a938"
},
{
"cell_type": "code",
"execution_count": 25,
"outputs": [
{
"data": {
"text/plain": "<torch._C.Generator at 0x226dc7744f0>"
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Seed for reproducibility\n",
"torch.manual_seed(1234)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.452156200Z",
"start_time": "2024-04-04T06:37:55.284602100Z"
}
},
"id": "aa19c822de265874"
},
{
"cell_type": "markdown",
"source": [
"#### Wczytywanie danych"
],
"metadata": {
"collapsed": false
},
"id": "47ebb4d371c1aa3f"
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [],
"source": [
"# Load data\n",
"train = pd.read_csv('datasets/train.csv')\n",
"test = pd.read_csv('datasets/test.csv')\n",
"\n",
"# Split data\n",
"X_train = train.drop(columns=['id', 'diagnosis']).values\n",
"y_train = train['diagnosis'].values\n",
"\n",
"X_test = test.drop(columns=['id', 'diagnosis']).values\n",
"y_test = test['diagnosis'].values\n",
"\n",
"# Convert data to PyTorch tensors\n",
"X_train = torch.FloatTensor(X_train)\n",
"y_train = torch.FloatTensor(y_train).view(-1, 1)\n",
"\n",
"X_test = torch.FloatTensor(X_test)\n",
"y_test = torch.FloatTensor(y_test).view(-1, 1)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.523028300Z",
"start_time": "2024-04-04T06:37:55.302118800Z"
}
},
"id": "3b26282bd5803093"
},
{
"cell_type": "markdown",
"source": [
"#### Parametry modelu"
],
"metadata": {
"collapsed": false
},
"id": "5776bfd5b5830e40"
},
{
"cell_type": "code",
"execution_count": 27,
"outputs": [],
"source": [
"# Parameters\n",
"input_size = X_train.shape[1]\n",
"hidden_size = 128\n",
"learning_rate = 0.001\n",
"weight_decay = 0.001\n",
"num_epochs = 1000"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.525074500Z",
"start_time": "2024-04-04T06:37:55.330649100Z"
}
},
"id": "997f473b9f1904af"
},
{
"cell_type": "code",
"execution_count": 28,
"outputs": [],
"source": [
"# Model initialization\n",
"model = NeuralNetwork(input_size, hidden_size)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.526081600Z",
"start_time": "2024-04-04T06:37:55.345104500Z"
}
},
"id": "813bf1fe055f7d6e"
},
{
"cell_type": "code",
"execution_count": 29,
"outputs": [],
"source": [
"# Loss function and optimizer\n",
"criterion = nn.BCELoss()\n",
"optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:55.526081600Z",
"start_time": "2024-04-04T06:37:55.360079200Z"
}
},
"id": "2a7ed7691c54d12b"
},
{
"cell_type": "markdown",
"source": [
"#### Trenowanie modelu"
],
"metadata": {
"collapsed": false
},
"id": "c3100fb3ca7a4978"
},
{
"cell_type": "code",
"execution_count": 30,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch [100/1000], Loss: 0.08850393444299698\n",
"Epoch [200/1000], Loss: 0.039436809718608856\n",
"Epoch [300/1000], Loss: 0.031037550419569016\n",
"Epoch [400/1000], Loss: 0.026670493185520172\n",
"Epoch [500/1000], Loss: 0.023590415716171265\n",
"Epoch [600/1000], Loss: 0.02146584913134575\n",
"Epoch [700/1000], Loss: 0.019706938415765762\n",
"Epoch [800/1000], Loss: 0.018304765224456787\n",
"Epoch [900/1000], Loss: 0.017177913337945938\n",
"Epoch [1000/1000], Loss: 0.016160517930984497\n"
]
}
],
"source": [
"# Training loop\n",
"for epoch in range(num_epochs):\n",
" # Zero the gradients\n",
" optimizer.zero_grad()\n",
"\n",
" # Forward pass\n",
" outputs = model(X_train)\n",
"\n",
" # Compute loss\n",
" loss = criterion(outputs, y_train)\n",
"\n",
" # Backward pass\n",
" loss.backward()\n",
"\n",
" # Update weights\n",
" optimizer.step()\n",
"\n",
" # Print loss\n",
" if (epoch + 1) % 100 == 0:\n",
" print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:58.081587800Z",
"start_time": "2024-04-04T06:37:55.376710100Z"
}
},
"id": "ab1005e4ca0e57ae"
},
{
"cell_type": "markdown",
"source": [
"#### Ewaluacja modelu"
],
"metadata": {
"collapsed": false
},
"id": "90ae4e97a49d8140"
},
{
"cell_type": "code",
"execution_count": 31,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" B 0.97 1.00 0.99 34\n",
" M 1.00 0.96 0.98 23\n",
"\n",
" accuracy 0.98 57\n",
" macro avg 0.99 0.98 0.98 57\n",
"weighted avg 0.98 0.98 0.98 57\n"
]
}
],
"source": [
"# Test the model\n",
"with torch.no_grad():\n",
" y_pred = model(X_test)\n",
" y_pred = np.where(y_pred > 0.5, 1, 0)\n",
" print(classification_report(y_test, y_pred, target_names=['B', 'M'])) "
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:37:58.167163100Z",
"start_time": "2024-04-04T06:37:58.080578600Z"
}
},
"id": "1e8bb7d1a0f4d572"
},
{
"cell_type": "markdown",
"source": [
"#### Zapisywanie modelu do pliku"
],
"metadata": {
"collapsed": false
},
"id": "2491b52c3a6f1d39"
},
{
"cell_type": "code",
"execution_count": 34,
"outputs": [],
"source": [
"# If directory models does not exist, create it\n",
"if not os.path.exists('./models'):\n",
" os.makedirs('./models')\n",
"\n",
"# Save the model\n",
"torch.save(model, './models/model.pth')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-04T06:49:31.738772700Z",
"start_time": "2024-04-04T06:49:31.718838500Z"
}
},
"id": "b7f509e40380b9c5"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}