ium_434766/lab5.ipynb

276 lines
25 KiB
Plaintext
Raw Normal View History

2021-04-17 13:35:20 +02:00
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python385jvsc74a57bd02cef13873963874fd5439bd04a135498d1dd9725d9d90f40de0b76178a8e03b1",
"display_name": "Python 3.8.5 64-bit (conda)"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import torch\n",
"import torch.nn.functional as F\n",
"from torch import nn\n",
"from torch.autograd import Variable\n",
"import torchvision.transforms as transforms\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from sklearn.metrics import accuracy_score\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"\n",
"class LogisticRegressionModel(nn.Module):\n",
" def __init__(self, input_dim, output_dim):\n",
" super(LogisticRegressionModel, self).__init__()\n",
" self.linear = nn.Linear(input_dim, output_dim)\n",
" self.sigmoid = nn.Sigmoid()\n",
" def forward(self, x):\n",
" out = self.linear(x)\n",
" return self.sigmoid(out)\n",
"\n",
"\n",
"data_train = pd.read_csv(\"data_train.csv\")\n",
"data_test = pd.read_csv(\"data_test.csv\")\n",
"data_val = pd.read_csv(\"data_val.csv\")\n",
"FEATURES = [ 'age','hypertension','heart_disease','ever_married', 'avg_glucose_level', 'bmi']\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"x_train = data_train[FEATURES].astype(np.float32)\n",
"y_train = data_train['stroke'].astype(np.float32)\n",
"\n",
"x_test = data_test[FEATURES].astype(np.float32)\n",
"y_test = data_test['stroke'].astype(np.float32)\n",
"\n",
"\n",
"\n",
"fTrain = torch.from_numpy(x_train.values)\n",
"tTrain = torch.from_numpy(y_train.values.reshape(2945,1))\n",
"\n",
"fTest= torch.from_numpy(x_test.values)\n",
"tTest = torch.from_numpy(y_test.values)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"\n",
"batch_size = 95\n",
"n_iters = 1000\n",
"num_epochs = int(n_iters / (len(x_train) / batch_size))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"input_dim = 6\n",
"output_dim = 1\n",
"\n",
"model = LogisticRegressionModel(input_dim, output_dim)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"learning_rate = 0.001\n",
"\n",
"criterion = torch.nn.BCELoss(reduction='mean') \n",
"optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"torch.Size([1, 6])\ntorch.Size([1])\n"
]
}
],
"source": [
"print(list(model.parameters())[0].size())\n",
"print(list(model.parameters())[1].size())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch # 0\n",
"4.4554009437561035\n",
"Epoch # 1\n",
"2.887434244155884\n",
"Epoch # 2\n",
"1.4808591604232788\n",
"Epoch # 3\n",
"0.6207292079925537\n",
"Epoch # 4\n",
"0.4031478762626648\n",
"Epoch # 5\n",
"0.34721270203590393\n",
"Epoch # 6\n",
"0.32333147525787354\n",
"Epoch # 7\n",
"0.3105970621109009\n",
"Epoch # 8\n",
"0.30295372009277344\n",
"Epoch # 9\n",
"0.2980167269706726\n",
"Epoch # 10\n",
"0.29466450214385986\n",
"Epoch # 11\n",
"0.29230451583862305\n",
"Epoch # 12\n",
"0.29059702157974243\n",
"Epoch # 13\n",
"0.2893349230289459\n",
"Epoch # 14\n",
"0.2883857190608978\n",
"Epoch # 15\n",
"0.2876618504524231\n",
"Epoch # 16\n",
"0.2871031165122986\n",
"Epoch # 17\n",
"0.28666743636131287\n",
"Epoch # 18\n",
"0.28632479906082153\n",
"Epoch # 19\n",
"0.2860531508922577\n",
"Epoch # 20\n",
"0.28583624958992004\n",
"Epoch # 21\n",
"0.2856619954109192\n",
"Epoch # 22\n",
"0.285521000623703\n",
"Epoch # 23\n",
"0.2854064106941223\n",
"Epoch # 24\n",
"0.2853126525878906\n",
"Epoch # 25\n",
"0.2852354049682617\n",
"Epoch # 26\n",
"0.2851715385913849\n",
"Epoch # 27\n",
"0.28511837124824524\n",
"Epoch # 28\n",
"0.2850736975669861\n",
"Epoch # 29\n",
"0.2850360572338104\n",
"Epoch # 30\n",
"0.28500401973724365\n",
"Epoch # 31\n",
"0.2849765419960022\n",
"X:\\Anaconda2020\\lib\\site-packages\\torch\\autograd\\__init__.py:145: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 10010). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ..\\c10\\cuda\\CUDAFunctions.cpp:109.)\n",
" Variable._execution_engine.run_backward(\n"
]
}
],
"source": [
"for epoch in range(num_epochs):\n",
" print (\"Epoch #\",epoch)\n",
" model.train()\n",
" optimizer.zero_grad()\n",
" # Forward pass\n",
" y_pred = model(fTrain)\n",
" # Compute Loss\n",
" loss = criterion(y_pred, tTrain)\n",
" print(loss.item())\n",
" # Backward pass\n",
" loss.backward()\n",
" optimizer.step()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"tags": []
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"predicted Y value: tensor([[0.0468],\n [0.0325],\n [0.2577],\n [0.2059],\n [0.1090],\n [0.0229],\n [0.2290],\n [0.0689],\n [0.2476],\n [0.0453],\n [0.0150],\n [0.4080],\n [0.0424],\n [0.0981],\n [0.0221],\n [0.1546],\n [0.1400],\n [0.1768],\n [0.1684],\n [0.0229],\n [0.1836],\n [0.1200],\n [0.0137],\n [0.2316],\n [0.0185],\n [0.0179],\n [0.0108],\n [0.0175],\n [0.0471],\n [0.4576],\n [0.0210],\n [0.0103],\n [0.0616],\n [0.1850],\n [0.4114],\n [0.4264],\n [0.0405],\n [0.0788],\n [0.2405],\n [0.0340],\n [0.4345],\n [0.1758],\n [0.0385],\n [0.0749],\n [0.4349],\n [0.0357],\n [0.0295],\n [0.3939],\n [0.1147],\n [0.3812],\n [0.0659],\n [0.0675],\n [0.0263],\n [0.1398],\n [0.0959],\n [0.0406],\n [0.0531],\n [0.0500],\n [0.4259],\n [0.1086],\n [0.0611],\n [0.0855],\n [0.0473],\n [0.2826],\n [0.1734],\n [0.0560],\n [0.0466],\n [0.0290],\n [0.1903],\n [0.4515],\n [0.0118],\n [0.2158],\n [0.1293],\n [0.2488],\n [0.0424],\n [0.1809],\n [0.0122],\n [0.0796],\n [0.0901],\n [0.0879],\n [0.0457],\n [0.0091],\n [0.0196],\n [0.3310],\n [0.0978],\n [0.0843],\n [0.0684],\n [0.0340],\n [0.0583],\n [0.0670],\n [0.0133],\n [0.1165],\n [0.0145],\n [0.1581],\n [0.1677],\n [0.0353],\n [0.0745],\n [0.0108],\n [0.0492],\n [0.0611],\n [0.2977],\n [0.2820],\n [0.0219],\n [0.0580],\n [0.0122],\n [0.0726],\n [0.3315],\n [0.0201],\n [0.2460],\n [0.0110],\n [0.0322],\n [0.0180],\n [0.0135],\n [0.3176],\n [0.1390],\n [0.0678],\n [0.1596],\n [0.0128],\n [0.0900],\n [0.0117],\n [0.0224],\n [0.0357],\n [0.0103],\n [0.1728],\n [0.0135],\n [0.0992],\n [0.4371],\n [0.4525],\n [0.0278],\n [0.0617],\n [0.2499],\n [0.0129],\n [0.0424],\n [0.0292],\n [0.3903],\n [0.0108],\n [0.0404],\n [0.0344],\n [0.4109],\n [0.3936],\n [0.0603],\n [0.4396],\n [0.1155],\n [0.3594],\n [0.0305],\n [0.0307],\n [0.0226],\n [0.1284],\n [0.0474],\n [0.0959],\n [0.0135],\n [0.0289],\n [0.3705],\n [0.1538],\n [0.4535],\n [0.0355],\n [0.0169],\n [0.1648],\n [0.4217],\n [0.0951],\n [0.0767],\n [0.0475],\n [0.0452],\n [0.1625],\n [0.0896],\n [0.0114],\n [0.0423],\n [0.3971],\n [0.0173],\n [0.0250],\n [0.3579],\n [0.0131],\n [0.0201],\n [0.0149],\n [0.2615],\n [0.1773],\n [0.1204],\n [0.3556],\n [0.2390],\n [0.0098],\n [0.0190],\n [0.3040],\n [0.0115],\n [0.2033],\n [0.1327],\n [0.0180],\n [0.0610],\n [0.2927],\n [0.1182],\n [0.0115],\n [0.4474],\n [0.3513],\n [0.0451],\n [0.4089],\n [0.0375],\n [0.0127],\n [0.0630],\n [0.0428],\n [0.2085],\n [0.0529],\n [0.3436],\n [0.0678],\n [0.0717],\n [0.0799],\n [0.0967],\n [0.1246],\n [0.1086],\n [0.0387],\n [0.1742],\n [0.1582],\n [0.1374],\n [0.4205],\n [0.0534],\n [0.3051],\n
]
}
],
"source": [
"\n",
"y_pred = model(fTest)\n",
"print(\"predicted Y value: \", y_pred.data)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The accuracy is 0.9480651731160896\n"
]
}
],
"source": [
"print (\"The accuracy is\", accuracy_score(tTest, np.argmax(y_pred.detach().numpy(), axis=1)))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"torch.save(model, 'stroke.pkl')"
]
}
]
}