ium_434766/lab5.ipynb

211 lines
23 KiB
Plaintext
Raw Normal View History

2021-04-17 13:35:20 +02:00
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2021-05-07 21:30:35 +02:00
"version": "3.8.5"
2021-04-17 13:35:20 +02:00
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python385jvsc74a57bd02cef13873963874fd5439bd04a135498d1dd9725d9d90f40de0b76178a8e03b1",
"display_name": "Python 3.8.5 64-bit (conda)"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 74,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [],
"source": [
"\n",
"import torch\n",
"import torch.nn.functional as F\n",
"from torch import nn\n",
"from torch.autograd import Variable\n",
"import torchvision.transforms as transforms\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from sklearn.metrics import accuracy_score\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"\n",
"class LogisticRegressionModel(nn.Module):\n",
" def __init__(self, input_dim, output_dim):\n",
" super(LogisticRegressionModel, self).__init__()\n",
" self.linear = nn.Linear(input_dim, output_dim)\n",
" self.sigmoid = nn.Sigmoid()\n",
" def forward(self, x):\n",
" out = self.linear(x)\n",
" return self.sigmoid(out)\n",
"\n",
2021-05-07 21:30:35 +02:00
"np.set_printoptions(suppress=False)\n",
2021-04-17 13:35:20 +02:00
"data_train = pd.read_csv(\"data_train.csv\")\n",
"data_test = pd.read_csv(\"data_test.csv\")\n",
"data_val = pd.read_csv(\"data_val.csv\")\n",
"FEATURES = [ 'age','hypertension','heart_disease','ever_married', 'avg_glucose_level', 'bmi']\n"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 75,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [],
"source": [
"x_train = data_train[FEATURES].astype(np.float32)\n",
"y_train = data_train['stroke'].astype(np.float32)\n",
"\n",
"x_test = data_test[FEATURES].astype(np.float32)\n",
"y_test = data_test['stroke'].astype(np.float32)\n",
"\n",
"\n",
"\n",
"fTrain = torch.from_numpy(x_train.values)\n",
"tTrain = torch.from_numpy(y_train.values.reshape(2945,1))\n",
"\n",
"fTest= torch.from_numpy(x_test.values)\n",
"tTest = torch.from_numpy(y_test.values)\n"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 76,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [],
"source": [
"\n",
2021-05-07 21:30:35 +02:00
"batch_size = 150\n",
2021-04-17 13:35:20 +02:00
"n_iters = 1000\n",
2021-05-07 21:30:35 +02:00
"num_epochs = 10"
2021-04-17 13:35:20 +02:00
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 77,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [],
"source": [
"input_dim = 6\n",
"output_dim = 1\n",
"\n",
"model = LogisticRegressionModel(input_dim, output_dim)\n"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 78,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [],
"source": [
"learning_rate = 0.001\n",
"\n",
"criterion = torch.nn.BCELoss(reduction='mean') \n",
"optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 79,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"torch.Size([1, 6])\ntorch.Size([1])\n"
]
}
],
"source": [
"print(list(model.parameters())[0].size())\n",
"print(list(model.parameters())[1].size())"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 80,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
2021-05-07 21:30:35 +02:00
"Epoch # 0\n0.34391772747039795\nEpoch # 1\n0.3400452435016632\nEpoch # 2\n0.33628249168395996\nEpoch # 3\n0.3326331079006195\nEpoch # 4\n0.3291005790233612\nEpoch # 5\n0.32568827271461487\nEpoch # 6\n0.32239940762519836\nEpoch # 7\n0.3192369043827057\nEpoch # 8\n0.3162035048007965\nEpoch # 9\n0.31330153346061707\n"
2021-04-17 13:35:20 +02:00
]
}
],
"source": [
"for epoch in range(num_epochs):\n",
" print (\"Epoch #\",epoch)\n",
" model.train()\n",
" optimizer.zero_grad()\n",
" # Forward pass\n",
" y_pred = model(fTrain)\n",
" # Compute Loss\n",
" loss = criterion(y_pred, tTrain)\n",
" print(loss.item())\n",
" # Backward pass\n",
" loss.backward()\n",
" optimizer.step()"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 81,
2021-04-17 13:35:20 +02:00
"metadata": {
"tags": []
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
2021-05-07 21:30:35 +02:00
"predicted Y value: tensor([[0.0089],\n [0.0051],\n [0.1535],\n [0.1008],\n [0.0365],\n [0.0014],\n [0.1275],\n [0.0172],\n [0.1439],\n [0.0088],\n [0.0013],\n [0.3466],\n [0.0078],\n [0.0303],\n [0.0024],\n [0.0607],\n [0.0556],\n [0.0826],\n [0.0765],\n [0.0027],\n [0.0869],\n [0.0424],\n [0.0013],\n [0.1338],\n [0.0017],\n [0.0020],\n [0.0009],\n [0.0014],\n [0.0090],\n [0.4073],\n [0.0026],\n [0.0009],\n [0.0141],\n [0.0897],\n [0.3593],\n [0.3849],\n [0.0073],\n [0.0204],\n [0.1406],\n [0.0053],\n [0.3840],\n [0.0802],\n [0.0068],\n [0.0190],\n [0.3849],\n [0.0034],\n [0.0045],\n [0.3272],\n [0.0397],\n [0.3087],\n [0.0162],\n [0.0159],\n [0.0033],\n [0.0559],\n [0.0238],\n [0.0073],\n [0.0113],\n [0.0102],\n [0.3827],\n [0.0359],\n [0.0138],\n [0.0248],\n [0.0080],\n [0.1858],\n [0.0766],\n [0.0123],\n [0.0077],\n [0.0042],\n [0.0908],\n [0.4172],\n [0.0010],\n [0.1105],\n [0.0463],\n [0.1457],\n [0.0078],\n [0.0821],\n [0.0011],\n [0.0210],\n [0.0273],\n [0.0248],\n [0.0082],\n [0.0007],\n [0.0022],\n [0.2436],\n [0.0297],\n [0.0235],\n [0.0168],\n [0.0053],\n [0.0128],\n [0.0156],\n [0.0009],\n [0.0375],\n [0.0008],\n [0.0645],\n [0.0750],\n [0.0055],\n [0.0185],\n [0.0008],\n [0.0082],\n [0.0138],\n [0.2082],\n [0.1823],\n [0.0027],\n [0.0124],\n [0.0010],\n [0.0187],\n [0.2454],\n [0.0019],\n [0.1413],\n [0.0010],\n [0.0050],\n [0.0020],\n [0.0011],\n [0.2266],\n [0.0545],\n [0.0164],\n [0.0678],\n [0.0012],\n [0.0271],\n [0.0009],\n [0.0029],\n [0.0058],\n [0.0009],\n [0.0762],\n [0.0013],\n [0.0276],\n [0.3940],\n [0.4213],\n [0.0041],\n [0.0144],\n [0.1491],\n [0.0011],\n [0.0077],\n [0.0032],\n [0.3155],\n [0.0009],\n [0.0072],\n [0.0056],\n [0.3580],\n [0.3235],\n [0.0130],\n [0.4032],\n [0.0405],\n [0.2882],\n [0.0045],\n [0.0041],\n [0.0026],\n [0.0354],\n [0.0094],\n [0.0278],\n [0.0011],\n [0.0036],\n [0.2996],\n [0.0652],\n [0.4247],\n [0.0048],\n [0.0016],\n [0.0703],\n [0.3676],\n [0.0231],\n [0.0206],\n [0.0093],\n [0.0087],\n [0.0649],\n [0.0207],\n [0.0010],\n [0.0076],\n [0.3366],\n [0.0015],\n [0.0034],\n [0.2819],\n [0.0007],\n [0.0024],\n [0.0015],\n [0.1623],\n [0.0838],\n [0.0431],\n [0.2744],\n [0.1369],\n [0.0007],\n [0.0022],\n [0.2049],\n [0.0010],\n [0.1057],\n [0.0503],\n [0.0021],\n [0.0136],\n [0.1939],\n [0.0401],\n [0.0010],\n [0.4003],\n [0.2621],\n [0.0087],\n [0.3507],\n [0.0061],\n [0.0012],\n [0.0103],\n [0.0080],\n [0.1068],\n [0.0098],\n [0.2625],\n [0.0162],\n [0.0178],\n [0.0215],\n [0.0283],\n [0.0444],\n [0.0356],\n [0.0037],\n [0.0779],\n [0.0652],\n [0.0521],\n [0.3626],\n [0.0116],\n [0.2099],\n
2021-04-17 13:35:20 +02:00
]
}
],
"source": [
"\n",
"y_pred = model(fTest)\n",
"print(\"predicted Y value: \", y_pred.data)"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 87,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The accuracy is 0.9480651731160896\n"
]
}
],
"source": [
"print (\"The accuracy is\", accuracy_score(tTest, np.argmax(y_pred.detach().numpy(), axis=1)))"
]
},
{
"cell_type": "code",
2021-05-07 21:30:35 +02:00
"execution_count": 83,
2021-04-17 13:35:20 +02:00
"metadata": {},
"outputs": [],
"source": [
"torch.save(model, 'stroke.pkl')"
]
}
]
}