391 lines
9.2 KiB
Plaintext
391 lines
9.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## IUM_05"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "6198de641534b1bc"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Wymagane zależności"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "6a7ce1eb01ab7917"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import torch\n",
|
|
"import torch.nn as nn\n",
|
|
"import torch.optim as optim\n",
|
|
"\n",
|
|
"import os\n",
|
|
"\n",
|
|
"from sklearn.metrics import classification_report"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.332889300Z",
|
|
"start_time": "2024-04-04T06:37:55.254786700Z"
|
|
}
|
|
},
|
|
"id": "2d998e3e2e5e8487"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Model"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "7112915408dc0168"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"outputs": [],
|
|
"source": [
|
|
"# Neural Network\n",
|
|
"class NeuralNetwork(nn.Module):\n",
|
|
" \"\"\"\n",
|
|
" Neural network model for classification problem.\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, input_size, hidden_size):\n",
|
|
" super(NeuralNetwork, self).__init__()\n",
|
|
"\n",
|
|
" self.fc1 = nn.Linear(input_size, hidden_size)\n",
|
|
" self.fc2 = nn.Linear(hidden_size, hidden_size // 2)\n",
|
|
" self.fc3 = nn.Linear(hidden_size // 2, 1)\n",
|
|
"\n",
|
|
" self.relu = nn.ReLU()\n",
|
|
" self.sigmoid = nn.Sigmoid()\n",
|
|
"\n",
|
|
" def forward(self, x):\n",
|
|
" out = self.fc1(x)\n",
|
|
" out = self.relu(out)\n",
|
|
" out = self.fc2(out)\n",
|
|
" out = self.relu(out)\n",
|
|
" out = self.fc3(out)\n",
|
|
" out = self.sigmoid(out)\n",
|
|
" return out"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.418810600Z",
|
|
"start_time": "2024-04-04T06:37:55.272635200Z"
|
|
}
|
|
},
|
|
"id": "559708985898a938"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": "<torch._C.Generator at 0x226dc7744f0>"
|
|
},
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Seed for reproducibility\n",
|
|
"torch.manual_seed(1234)"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.452156200Z",
|
|
"start_time": "2024-04-04T06:37:55.284602100Z"
|
|
}
|
|
},
|
|
"id": "aa19c822de265874"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Wczytywanie danych"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "47ebb4d371c1aa3f"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load data\n",
|
|
"train = pd.read_csv('datasets/train.csv')\n",
|
|
"test = pd.read_csv('datasets/test.csv')\n",
|
|
"\n",
|
|
"# Split data\n",
|
|
"X_train = train.drop(columns=['id', 'diagnosis']).values\n",
|
|
"y_train = train['diagnosis'].values\n",
|
|
"\n",
|
|
"X_test = test.drop(columns=['id', 'diagnosis']).values\n",
|
|
"y_test = test['diagnosis'].values\n",
|
|
"\n",
|
|
"# Convert data to PyTorch tensors\n",
|
|
"X_train = torch.FloatTensor(X_train)\n",
|
|
"y_train = torch.FloatTensor(y_train).view(-1, 1)\n",
|
|
"\n",
|
|
"X_test = torch.FloatTensor(X_test)\n",
|
|
"y_test = torch.FloatTensor(y_test).view(-1, 1)"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.523028300Z",
|
|
"start_time": "2024-04-04T06:37:55.302118800Z"
|
|
}
|
|
},
|
|
"id": "3b26282bd5803093"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Parametry modelu"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "5776bfd5b5830e40"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"outputs": [],
|
|
"source": [
|
|
"# Parameters\n",
|
|
"input_size = X_train.shape[1]\n",
|
|
"hidden_size = 128\n",
|
|
"learning_rate = 0.001\n",
|
|
"weight_decay = 0.001\n",
|
|
"num_epochs = 1000"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.525074500Z",
|
|
"start_time": "2024-04-04T06:37:55.330649100Z"
|
|
}
|
|
},
|
|
"id": "997f473b9f1904af"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"outputs": [],
|
|
"source": [
|
|
"# Model initialization\n",
|
|
"model = NeuralNetwork(input_size, hidden_size)"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.526081600Z",
|
|
"start_time": "2024-04-04T06:37:55.345104500Z"
|
|
}
|
|
},
|
|
"id": "813bf1fe055f7d6e"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"outputs": [],
|
|
"source": [
|
|
"# Loss function and optimizer\n",
|
|
"criterion = nn.BCELoss()\n",
|
|
"optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:55.526081600Z",
|
|
"start_time": "2024-04-04T06:37:55.360079200Z"
|
|
}
|
|
},
|
|
"id": "2a7ed7691c54d12b"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Trenowanie modelu"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "c3100fb3ca7a4978"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Epoch [100/1000], Loss: 0.08850393444299698\n",
|
|
"Epoch [200/1000], Loss: 0.039436809718608856\n",
|
|
"Epoch [300/1000], Loss: 0.031037550419569016\n",
|
|
"Epoch [400/1000], Loss: 0.026670493185520172\n",
|
|
"Epoch [500/1000], Loss: 0.023590415716171265\n",
|
|
"Epoch [600/1000], Loss: 0.02146584913134575\n",
|
|
"Epoch [700/1000], Loss: 0.019706938415765762\n",
|
|
"Epoch [800/1000], Loss: 0.018304765224456787\n",
|
|
"Epoch [900/1000], Loss: 0.017177913337945938\n",
|
|
"Epoch [1000/1000], Loss: 0.016160517930984497\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Training loop\n",
|
|
"for epoch in range(num_epochs):\n",
|
|
" # Zero the gradients\n",
|
|
" optimizer.zero_grad()\n",
|
|
"\n",
|
|
" # Forward pass\n",
|
|
" outputs = model(X_train)\n",
|
|
"\n",
|
|
" # Compute loss\n",
|
|
" loss = criterion(outputs, y_train)\n",
|
|
"\n",
|
|
" # Backward pass\n",
|
|
" loss.backward()\n",
|
|
"\n",
|
|
" # Update weights\n",
|
|
" optimizer.step()\n",
|
|
"\n",
|
|
" # Print loss\n",
|
|
" if (epoch + 1) % 100 == 0:\n",
|
|
" print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:58.081587800Z",
|
|
"start_time": "2024-04-04T06:37:55.376710100Z"
|
|
}
|
|
},
|
|
"id": "ab1005e4ca0e57ae"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Ewaluacja modelu"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "90ae4e97a49d8140"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" B 0.97 1.00 0.99 34\n",
|
|
" M 1.00 0.96 0.98 23\n",
|
|
"\n",
|
|
" accuracy 0.98 57\n",
|
|
" macro avg 0.99 0.98 0.98 57\n",
|
|
"weighted avg 0.98 0.98 0.98 57\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Test the model\n",
|
|
"with torch.no_grad():\n",
|
|
" y_pred = model(X_test)\n",
|
|
" y_pred = np.where(y_pred > 0.5, 1, 0)\n",
|
|
" print(classification_report(y_test, y_pred, target_names=['B', 'M'])) "
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:37:58.167163100Z",
|
|
"start_time": "2024-04-04T06:37:58.080578600Z"
|
|
}
|
|
},
|
|
"id": "1e8bb7d1a0f4d572"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"#### Zapisywanie modelu do pliku"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"id": "2491b52c3a6f1d39"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"outputs": [],
|
|
"source": [
|
|
"# If directory models does not exist, create it\n",
|
|
"if not os.path.exists('./models'):\n",
|
|
" os.makedirs('./models')\n",
|
|
"\n",
|
|
"# Save the model\n",
|
|
"torch.save(model, './models/model.pth')"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-04-04T06:49:31.738772700Z",
|
|
"start_time": "2024-04-04T06:49:31.718838500Z"
|
|
}
|
|
},
|
|
"id": "b7f509e40380b9c5"
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 2
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython2",
|
|
"version": "2.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|