ium_444421/classification.ipynb
2022-04-21 20:14:08 +02:00

34 KiB
Raw Blame History

!kaggle datasets download -d yasserh/breast-cancer-dataset
!unzip -o breast-cancer-dataset.zip
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import torch.nn.functional as F
import pandas as pd
class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim,50)
        self.layer2 = nn.Linear(50, 20)
        self.layer3 = nn.Linear(20, 3)
        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x)) # To check with the loss function
        return x
data = pd.read_csv('breast-cancer.csv', index_col=0)
data[1:10]
diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave points_mean symmetry_mean ... radius_worst texture_worst perimeter_worst area_worst smoothness_worst compactness_worst concavity_worst concave points_worst symmetry_worst fractal_dimension_worst
id
842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.08690 0.07017 0.1812 ... 24.99 23.41 158.80 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902
84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.19740 0.12790 0.2069 ... 23.57 25.53 152.50 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758
84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.24140 0.10520 0.2597 ... 14.91 26.50 98.87 567.7 0.2098 0.8663 0.6869 0.2575 0.6638 0.17300
84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.19800 0.10430 0.1809 ... 22.54 16.67 152.20 1575.0 0.1374 0.2050 0.4000 0.1625 0.2364 0.07678
843786 M 12.45 15.70 82.57 477.1 0.12780 0.17000 0.15780 0.08089 0.2087 ... 15.47 23.75 103.40 741.6 0.1791 0.5249 0.5355 0.1741 0.3985 0.12440
844359 M 18.25 19.98 119.60 1040.0 0.09463 0.10900 0.11270 0.07400 0.1794 ... 22.88 27.66 153.20 1606.0 0.1442 0.2576 0.3784 0.1932 0.3063 0.08368
84458202 M 13.71 20.83 90.20 577.9 0.11890 0.16450 0.09366 0.05985 0.2196 ... 17.06 28.14 110.60 897.0 0.1654 0.3682 0.2678 0.1556 0.3196 0.11510
844981 M 13.00 21.82 87.50 519.8 0.12730 0.19320 0.18590 0.09353 0.2350 ... 15.49 30.73 106.20 739.3 0.1703 0.5401 0.5390 0.2060 0.4378 0.10720
84501001 M 12.46 24.04 83.97 475.9 0.11860 0.23960 0.22730 0.08543 0.2030 ... 15.09 40.68 97.65 711.4 0.1853 1.0580 1.1050 0.2210 0.4366 0.20750

9 rows × 31 columns

lb = LabelEncoder()
data['diagnosis'] = lb.fit_transform(data['diagnosis'])
features = data.iloc[:, 1:32].values
labels = np.array(data['diagnosis'])
features[1:10]
array([[2.057e+01, 1.777e+01, 1.329e+02, 1.326e+03, 8.474e-02, 7.864e-02,
        8.690e-02, 7.017e-02, 1.812e-01, 5.667e-02, 5.435e-01, 7.339e-01,
        3.398e+00, 7.408e+01, 5.225e-03, 1.308e-02, 1.860e-02, 1.340e-02,
        1.389e-02, 3.532e-03, 2.499e+01, 2.341e+01, 1.588e+02, 1.956e+03,
        1.238e-01, 1.866e-01, 2.416e-01, 1.860e-01, 2.750e-01, 8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, 1.203e+03, 1.096e-01, 1.599e-01,
        1.974e-01, 1.279e-01, 2.069e-01, 5.999e-02, 7.456e-01, 7.869e-01,
        4.585e+00, 9.403e+01, 6.150e-03, 4.006e-02, 3.832e-02, 2.058e-02,
        2.250e-02, 4.571e-03, 2.357e+01, 2.553e+01, 1.525e+02, 1.709e+03,
        1.444e-01, 4.245e-01, 4.504e-01, 2.430e-01, 3.613e-01, 8.758e-02],
       [1.142e+01, 2.038e+01, 7.758e+01, 3.861e+02, 1.425e-01, 2.839e-01,
        2.414e-01, 1.052e-01, 2.597e-01, 9.744e-02, 4.956e-01, 1.156e+00,
        3.445e+00, 2.723e+01, 9.110e-03, 7.458e-02, 5.661e-02, 1.867e-02,
        5.963e-02, 9.208e-03, 1.491e+01, 2.650e+01, 9.887e+01, 5.677e+02,
        2.098e-01, 8.663e-01, 6.869e-01, 2.575e-01, 6.638e-01, 1.730e-01],
       [2.029e+01, 1.434e+01, 1.351e+02, 1.297e+03, 1.003e-01, 1.328e-01,
        1.980e-01, 1.043e-01, 1.809e-01, 5.883e-02, 7.572e-01, 7.813e-01,
        5.438e+00, 9.444e+01, 1.149e-02, 2.461e-02, 5.688e-02, 1.885e-02,
        1.756e-02, 5.115e-03, 2.254e+01, 1.667e+01, 1.522e+02, 1.575e+03,
        1.374e-01, 2.050e-01, 4.000e-01, 1.625e-01, 2.364e-01, 7.678e-02],
       [1.245e+01, 1.570e+01, 8.257e+01, 4.771e+02, 1.278e-01, 1.700e-01,
        1.578e-01, 8.089e-02, 2.087e-01, 7.613e-02, 3.345e-01, 8.902e-01,
        2.217e+00, 2.719e+01, 7.510e-03, 3.345e-02, 3.672e-02, 1.137e-02,
        2.165e-02, 5.082e-03, 1.547e+01, 2.375e+01, 1.034e+02, 7.416e+02,
        1.791e-01, 5.249e-01, 5.355e-01, 1.741e-01, 3.985e-01, 1.244e-01],
       [1.825e+01, 1.998e+01, 1.196e+02, 1.040e+03, 9.463e-02, 1.090e-01,
        1.127e-01, 7.400e-02, 1.794e-01, 5.742e-02, 4.467e-01, 7.732e-01,
        3.180e+00, 5.391e+01, 4.314e-03, 1.382e-02, 2.254e-02, 1.039e-02,
        1.369e-02, 2.179e-03, 2.288e+01, 2.766e+01, 1.532e+02, 1.606e+03,
        1.442e-01, 2.576e-01, 3.784e-01, 1.932e-01, 3.063e-01, 8.368e-02],
       [1.371e+01, 2.083e+01, 9.020e+01, 5.779e+02, 1.189e-01, 1.645e-01,
        9.366e-02, 5.985e-02, 2.196e-01, 7.451e-02, 5.835e-01, 1.377e+00,
        3.856e+00, 5.096e+01, 8.805e-03, 3.029e-02, 2.488e-02, 1.448e-02,
        1.486e-02, 5.412e-03, 1.706e+01, 2.814e+01, 1.106e+02, 8.970e+02,
        1.654e-01, 3.682e-01, 2.678e-01, 1.556e-01, 3.196e-01, 1.151e-01],
       [1.300e+01, 2.182e+01, 8.750e+01, 5.198e+02, 1.273e-01, 1.932e-01,
        1.859e-01, 9.353e-02, 2.350e-01, 7.389e-02, 3.063e-01, 1.002e+00,
        2.406e+00, 2.432e+01, 5.731e-03, 3.502e-02, 3.553e-02, 1.226e-02,
        2.143e-02, 3.749e-03, 1.549e+01, 3.073e+01, 1.062e+02, 7.393e+02,
        1.703e-01, 5.401e-01, 5.390e-01, 2.060e-01, 4.378e-01, 1.072e-01],
       [1.246e+01, 2.404e+01, 8.397e+01, 4.759e+02, 1.186e-01, 2.396e-01,
        2.273e-01, 8.543e-02, 2.030e-01, 8.243e-02, 2.976e-01, 1.599e+00,
        2.039e+00, 2.394e+01, 7.149e-03, 7.217e-02, 7.743e-02, 1.432e-02,
        1.789e-02, 1.008e-02, 1.509e+01, 4.068e+01, 9.765e+01, 7.114e+02,
        1.853e-01, 1.058e+00, 1.105e+00, 2.210e-01, 4.366e-01, 2.075e-01]])
labels[1:10]
array([1, 1, 1, 1, 1, 1, 1, 1, 1])
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42, shuffle=True)
# Training
model = Model(features_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
epochs = 100

def print_(loss):
    print ("The loss calculated: ", loss)
# Not using dataloader
x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long()
for epoch in range(1, epochs+1):
    print ("Epoch #",epoch)
    y_pred = model(x_train)
    loss = loss_fn(y_pred, y_train)
    print_(loss.item())
    
    # Zero gradients
    optimizer.zero_grad()
    loss.backward() # Gradients
    optimizer.step() # Update
Epoch # 1
The loss calculated:  0.922476053237915
Epoch # 2
The loss calculated:  0.9223369359970093
Epoch # 3
The loss calculated:  0.9223369359970093
Epoch # 4
The loss calculated:  0.9223369359970093
Epoch # 5
The loss calculated:  0.9223369359970093
Epoch # 6
The loss calculated:  0.9223369359970093
Epoch # 7
The loss calculated:  0.9223369359970093
Epoch # 8
The loss calculated:  0.9223369359970093
Epoch # 9
The loss calculated:  0.9223369359970093
Epoch # 10
The loss calculated:  0.9223369359970093
Epoch # 11
The loss calculated:  0.9223369359970093
Epoch # 12
The loss calculated:  0.9223369359970093
Epoch # 13
The loss calculated:  0.9223369359970093
Epoch # 14
The loss calculated:  0.9223369359970093
Epoch # 15
The loss calculated:  0.9223369359970093
Epoch # 16
The loss calculated:  0.9223369359970093
Epoch # 17
The loss calculated:  0.9223369359970093
Epoch # 18
The loss calculated:  0.9223369359970093
Epoch # 19
The loss calculated:  0.9223369359970093
Epoch # 20
The loss calculated:  0.9223369359970093
Epoch # 21
The loss calculated:  0.9223369359970093
Epoch # 22
The loss calculated:  0.9223369359970093
Epoch # 23
The loss calculated:  0.9223369359970093
Epoch # 24
The loss calculated:  0.9223369359970093
Epoch # 25
The loss calculated:  0.9223369359970093
Epoch # 26
The loss calculated:  0.9223369359970093
Epoch # 27
The loss calculated:  0.9223369359970093
Epoch # 28
The loss calculated:  0.9223369359970093
Epoch # 29
The loss calculated:  0.9223369359970093
Epoch # 30
The loss calculated:  0.9223369359970093
Epoch # 31
The loss calculated:  0.9223369359970093
Epoch # 32
The loss calculated:  0.9223369359970093
Epoch # 33
The loss calculated:  0.9223369359970093
Epoch # 34
The loss calculated:  0.9223369359970093
Epoch # 35
The loss calculated:  0.9223369359970093
Epoch # 36
The loss calculated:  0.9223369359970093
Epoch # 37
The loss calculated:  0.9223369359970093
Epoch # 38
The loss calculated:  0.9223369359970093
Epoch # 39
The loss calculated:  0.9223369359970093
Epoch # 40
The loss calculated:  0.9223369359970093
Epoch # 41
The loss calculated:  0.9223369359970093
Epoch # 42
The loss calculated:  0.9223369359970093
Epoch # 43
The loss calculated:  0.9223369359970093
Epoch # 44
The loss calculated:  0.9223369359970093
Epoch # 45
The loss calculated:  0.9223369359970093
Epoch # 46
The loss calculated:  0.9223369359970093
Epoch # 47
The loss calculated:  0.9223369359970093
Epoch # 48
The loss calculated:  0.9223369359970093
Epoch # 49
The loss calculated:  0.9223369359970093
Epoch # 50
The loss calculated:  0.9223369359970093
Epoch # 51
The loss calculated:  0.9223369359970093
Epoch # 52
The loss calculated:  0.9223369359970093
Epoch # 53
The loss calculated:  0.9223369359970093
Epoch # 54
The loss calculated:  0.9223369359970093
Epoch # 55
The loss calculated:  0.9223369359970093
Epoch # 56
The loss calculated:  0.9223369359970093
Epoch # 57
The loss calculated:  0.9223369359970093
Epoch # 58
The loss calculated:  0.9223369359970093
Epoch # 59
The loss calculated:  0.9223369359970093
Epoch # 60
The loss calculated:  0.9223369359970093
Epoch # 61
The loss calculated:  0.9223369359970093
Epoch # 62
The loss calculated:  0.9223369359970093
Epoch # 63
The loss calculated:  0.9223369359970093
Epoch # 64
The loss calculated:  0.9223369359970093
Epoch # 65
The loss calculated:  0.9223369359970093
Epoch # 66
The loss calculated:  0.9223369359970093
Epoch # 67
The loss calculated:  0.9223369359970093
Epoch # 68
The loss calculated:  0.9223369359970093
Epoch # 69
The loss calculated:  0.9223369359970093
Epoch # 70
The loss calculated:  0.9223369359970093
Epoch # 71
The loss calculated:  0.9223369359970093
Epoch # 72
The loss calculated:  0.9223369359970093
Epoch # 73
The loss calculated:  0.9223369359970093
Epoch # 74
The loss calculated:  0.9223369359970093
Epoch # 75
The loss calculated:  0.9223369359970093
Epoch # 76
The loss calculated:  0.9223369359970093
Epoch # 77
The loss calculated:  0.9223369359970093
Epoch # 78
The loss calculated:  0.9223369359970093
Epoch # 79
The loss calculated:  0.9223369359970093
Epoch # 80
The loss calculated:  0.9223369359970093
Epoch # 81
The loss calculated:  0.9223369359970093
Epoch # 82
The loss calculated:  0.9223369359970093
Epoch # 83
The loss calculated:  0.9223369359970093
Epoch # 84
The loss calculated:  0.9223369359970093
Epoch # 85
The loss calculated:  0.9223369359970093
Epoch # 86
The loss calculated:  0.9223369359970093
Epoch # 87
The loss calculated:  0.9223369359970093
Epoch # 88
The loss calculated:  0.9223369359970093
Epoch # 89
The loss calculated:  0.9223369359970093
Epoch # 90
The loss calculated:  0.9223369359970093
Epoch # 91
The loss calculated:  0.9223369359970093
Epoch # 92
The loss calculated:  0.9223369359970093
Epoch # 93
The loss calculated:  0.9223369359970093
Epoch # 94
The loss calculated:  0.9223369359970093
Epoch # 95
The loss calculated:  0.9223369359970093
Epoch # 96
The loss calculated:  0.9223369359970093
Epoch # 97
The loss calculated:  0.9223369359970093
Epoch # 98
The loss calculated:  0.9223369359970093
Epoch # 99
The loss calculated:  0.9223369359970093
Epoch # 100
The loss calculated:  0.9223369359970093
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:11: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  # This is added back by InteractiveShellApp.init_path()
# Prediction
x_test = Variable(torch.from_numpy(features_test)).float()
pred = model(x_test)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:11: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  # This is added back by InteractiveShellApp.init_path()
pred = pred.detach().numpy()
pred[1:10]
array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)
print ("The accuracy is", accuracy_score(labels_test, np.argmax(pred, axis=1)))
The accuracy is 0.6223776223776224
labels_test[0]
0
torch.save(model, "travel_insurance-pytorch.pkl")
saved_model = torch.load("travel_insurance-pytorch.pkl")
np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:11: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  # This is added back by InteractiveShellApp.init_path()
0