34 KiB
34 KiB
!kaggle datasets download -d yasserh/breast-cancer-dataset
!unzip -o breast-cancer-dataset.zip
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import torch.nn.functional as F
import pandas as pd
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim,50)
self.layer2 = nn.Linear(50, 20)
self.layer3 = nn.Linear(20, 3)
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.layer3(x)) # To check with the loss function
return x
data = pd.read_csv('breast-cancer.csv', index_col=0)
data[1:10]
diagnosis | radius_mean | texture_mean | perimeter_mean | area_mean | smoothness_mean | compactness_mean | concavity_mean | concave points_mean | symmetry_mean | ... | radius_worst | texture_worst | perimeter_worst | area_worst | smoothness_worst | compactness_worst | concavity_worst | concave points_worst | symmetry_worst | fractal_dimension_worst | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||||
842517 | M | 20.57 | 17.77 | 132.90 | 1326.0 | 0.08474 | 0.07864 | 0.08690 | 0.07017 | 0.1812 | ... | 24.99 | 23.41 | 158.80 | 1956.0 | 0.1238 | 0.1866 | 0.2416 | 0.1860 | 0.2750 | 0.08902 |
84300903 | M | 19.69 | 21.25 | 130.00 | 1203.0 | 0.10960 | 0.15990 | 0.19740 | 0.12790 | 0.2069 | ... | 23.57 | 25.53 | 152.50 | 1709.0 | 0.1444 | 0.4245 | 0.4504 | 0.2430 | 0.3613 | 0.08758 |
84348301 | M | 11.42 | 20.38 | 77.58 | 386.1 | 0.14250 | 0.28390 | 0.24140 | 0.10520 | 0.2597 | ... | 14.91 | 26.50 | 98.87 | 567.7 | 0.2098 | 0.8663 | 0.6869 | 0.2575 | 0.6638 | 0.17300 |
84358402 | M | 20.29 | 14.34 | 135.10 | 1297.0 | 0.10030 | 0.13280 | 0.19800 | 0.10430 | 0.1809 | ... | 22.54 | 16.67 | 152.20 | 1575.0 | 0.1374 | 0.2050 | 0.4000 | 0.1625 | 0.2364 | 0.07678 |
843786 | M | 12.45 | 15.70 | 82.57 | 477.1 | 0.12780 | 0.17000 | 0.15780 | 0.08089 | 0.2087 | ... | 15.47 | 23.75 | 103.40 | 741.6 | 0.1791 | 0.5249 | 0.5355 | 0.1741 | 0.3985 | 0.12440 |
844359 | M | 18.25 | 19.98 | 119.60 | 1040.0 | 0.09463 | 0.10900 | 0.11270 | 0.07400 | 0.1794 | ... | 22.88 | 27.66 | 153.20 | 1606.0 | 0.1442 | 0.2576 | 0.3784 | 0.1932 | 0.3063 | 0.08368 |
84458202 | M | 13.71 | 20.83 | 90.20 | 577.9 | 0.11890 | 0.16450 | 0.09366 | 0.05985 | 0.2196 | ... | 17.06 | 28.14 | 110.60 | 897.0 | 0.1654 | 0.3682 | 0.2678 | 0.1556 | 0.3196 | 0.11510 |
844981 | M | 13.00 | 21.82 | 87.50 | 519.8 | 0.12730 | 0.19320 | 0.18590 | 0.09353 | 0.2350 | ... | 15.49 | 30.73 | 106.20 | 739.3 | 0.1703 | 0.5401 | 0.5390 | 0.2060 | 0.4378 | 0.10720 |
84501001 | M | 12.46 | 24.04 | 83.97 | 475.9 | 0.11860 | 0.23960 | 0.22730 | 0.08543 | 0.2030 | ... | 15.09 | 40.68 | 97.65 | 711.4 | 0.1853 | 1.0580 | 1.1050 | 0.2210 | 0.4366 | 0.20750 |
9 rows × 31 columns
lb = LabelEncoder()
data['diagnosis'] = lb.fit_transform(data['diagnosis'])
features = data.iloc[:, 1:32].values
labels = np.array(data['diagnosis'])
features[1:10]
array([[2.057e+01, 1.777e+01, 1.329e+02, 1.326e+03, 8.474e-02, 7.864e-02, 8.690e-02, 7.017e-02, 1.812e-01, 5.667e-02, 5.435e-01, 7.339e-01, 3.398e+00, 7.408e+01, 5.225e-03, 1.308e-02, 1.860e-02, 1.340e-02, 1.389e-02, 3.532e-03, 2.499e+01, 2.341e+01, 1.588e+02, 1.956e+03, 1.238e-01, 1.866e-01, 2.416e-01, 1.860e-01, 2.750e-01, 8.902e-02], [1.969e+01, 2.125e+01, 1.300e+02, 1.203e+03, 1.096e-01, 1.599e-01, 1.974e-01, 1.279e-01, 2.069e-01, 5.999e-02, 7.456e-01, 7.869e-01, 4.585e+00, 9.403e+01, 6.150e-03, 4.006e-02, 3.832e-02, 2.058e-02, 2.250e-02, 4.571e-03, 2.357e+01, 2.553e+01, 1.525e+02, 1.709e+03, 1.444e-01, 4.245e-01, 4.504e-01, 2.430e-01, 3.613e-01, 8.758e-02], [1.142e+01, 2.038e+01, 7.758e+01, 3.861e+02, 1.425e-01, 2.839e-01, 2.414e-01, 1.052e-01, 2.597e-01, 9.744e-02, 4.956e-01, 1.156e+00, 3.445e+00, 2.723e+01, 9.110e-03, 7.458e-02, 5.661e-02, 1.867e-02, 5.963e-02, 9.208e-03, 1.491e+01, 2.650e+01, 9.887e+01, 5.677e+02, 2.098e-01, 8.663e-01, 6.869e-01, 2.575e-01, 6.638e-01, 1.730e-01], [2.029e+01, 1.434e+01, 1.351e+02, 1.297e+03, 1.003e-01, 1.328e-01, 1.980e-01, 1.043e-01, 1.809e-01, 5.883e-02, 7.572e-01, 7.813e-01, 5.438e+00, 9.444e+01, 1.149e-02, 2.461e-02, 5.688e-02, 1.885e-02, 1.756e-02, 5.115e-03, 2.254e+01, 1.667e+01, 1.522e+02, 1.575e+03, 1.374e-01, 2.050e-01, 4.000e-01, 1.625e-01, 2.364e-01, 7.678e-02], [1.245e+01, 1.570e+01, 8.257e+01, 4.771e+02, 1.278e-01, 1.700e-01, 1.578e-01, 8.089e-02, 2.087e-01, 7.613e-02, 3.345e-01, 8.902e-01, 2.217e+00, 2.719e+01, 7.510e-03, 3.345e-02, 3.672e-02, 1.137e-02, 2.165e-02, 5.082e-03, 1.547e+01, 2.375e+01, 1.034e+02, 7.416e+02, 1.791e-01, 5.249e-01, 5.355e-01, 1.741e-01, 3.985e-01, 1.244e-01], [1.825e+01, 1.998e+01, 1.196e+02, 1.040e+03, 9.463e-02, 1.090e-01, 1.127e-01, 7.400e-02, 1.794e-01, 5.742e-02, 4.467e-01, 7.732e-01, 3.180e+00, 5.391e+01, 4.314e-03, 1.382e-02, 2.254e-02, 1.039e-02, 1.369e-02, 2.179e-03, 2.288e+01, 2.766e+01, 1.532e+02, 1.606e+03, 1.442e-01, 2.576e-01, 3.784e-01, 1.932e-01, 3.063e-01, 8.368e-02], [1.371e+01, 2.083e+01, 9.020e+01, 5.779e+02, 1.189e-01, 1.645e-01, 9.366e-02, 5.985e-02, 2.196e-01, 7.451e-02, 5.835e-01, 1.377e+00, 3.856e+00, 5.096e+01, 8.805e-03, 3.029e-02, 2.488e-02, 1.448e-02, 1.486e-02, 5.412e-03, 1.706e+01, 2.814e+01, 1.106e+02, 8.970e+02, 1.654e-01, 3.682e-01, 2.678e-01, 1.556e-01, 3.196e-01, 1.151e-01], [1.300e+01, 2.182e+01, 8.750e+01, 5.198e+02, 1.273e-01, 1.932e-01, 1.859e-01, 9.353e-02, 2.350e-01, 7.389e-02, 3.063e-01, 1.002e+00, 2.406e+00, 2.432e+01, 5.731e-03, 3.502e-02, 3.553e-02, 1.226e-02, 2.143e-02, 3.749e-03, 1.549e+01, 3.073e+01, 1.062e+02, 7.393e+02, 1.703e-01, 5.401e-01, 5.390e-01, 2.060e-01, 4.378e-01, 1.072e-01], [1.246e+01, 2.404e+01, 8.397e+01, 4.759e+02, 1.186e-01, 2.396e-01, 2.273e-01, 8.543e-02, 2.030e-01, 8.243e-02, 2.976e-01, 1.599e+00, 2.039e+00, 2.394e+01, 7.149e-03, 7.217e-02, 7.743e-02, 1.432e-02, 1.789e-02, 1.008e-02, 1.509e+01, 4.068e+01, 9.765e+01, 7.114e+02, 1.853e-01, 1.058e+00, 1.105e+00, 2.210e-01, 4.366e-01, 2.075e-01]])
labels[1:10]
array([1, 1, 1, 1, 1, 1, 1, 1, 1])
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=42, shuffle=True)
# Training
model = Model(features_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
epochs = 100
def print_(loss):
print ("The loss calculated: ", loss)
# Not using dataloader
x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long()
for epoch in range(1, epochs+1):
print ("Epoch #",epoch)
y_pred = model(x_train)
loss = loss_fn(y_pred, y_train)
print_(loss.item())
# Zero gradients
optimizer.zero_grad()
loss.backward() # Gradients
optimizer.step() # Update
Epoch # 1 The loss calculated: 0.922476053237915 Epoch # 2 The loss calculated: 0.9223369359970093 Epoch # 3 The loss calculated: 0.9223369359970093 Epoch # 4 The loss calculated: 0.9223369359970093 Epoch # 5 The loss calculated: 0.9223369359970093 Epoch # 6 The loss calculated: 0.9223369359970093 Epoch # 7 The loss calculated: 0.9223369359970093 Epoch # 8 The loss calculated: 0.9223369359970093 Epoch # 9 The loss calculated: 0.9223369359970093 Epoch # 10 The loss calculated: 0.9223369359970093 Epoch # 11 The loss calculated: 0.9223369359970093 Epoch # 12 The loss calculated: 0.9223369359970093 Epoch # 13 The loss calculated: 0.9223369359970093 Epoch # 14 The loss calculated: 0.9223369359970093 Epoch # 15 The loss calculated: 0.9223369359970093 Epoch # 16 The loss calculated: 0.9223369359970093 Epoch # 17 The loss calculated: 0.9223369359970093 Epoch # 18 The loss calculated: 0.9223369359970093 Epoch # 19 The loss calculated: 0.9223369359970093 Epoch # 20 The loss calculated: 0.9223369359970093 Epoch # 21 The loss calculated: 0.9223369359970093 Epoch # 22 The loss calculated: 0.9223369359970093 Epoch # 23 The loss calculated: 0.9223369359970093 Epoch # 24 The loss calculated: 0.9223369359970093 Epoch # 25 The loss calculated: 0.9223369359970093 Epoch # 26 The loss calculated: 0.9223369359970093 Epoch # 27 The loss calculated: 0.9223369359970093 Epoch # 28 The loss calculated: 0.9223369359970093 Epoch # 29 The loss calculated: 0.9223369359970093 Epoch # 30 The loss calculated: 0.9223369359970093 Epoch # 31 The loss calculated: 0.9223369359970093 Epoch # 32 The loss calculated: 0.9223369359970093 Epoch # 33 The loss calculated: 0.9223369359970093 Epoch # 34 The loss calculated: 0.9223369359970093 Epoch # 35 The loss calculated: 0.9223369359970093 Epoch # 36 The loss calculated: 0.9223369359970093 Epoch # 37 The loss calculated: 0.9223369359970093 Epoch # 38 The loss calculated: 0.9223369359970093 Epoch # 39 The loss calculated: 0.9223369359970093 Epoch # 40 The loss calculated: 0.9223369359970093 Epoch # 41 The loss calculated: 0.9223369359970093 Epoch # 42 The loss calculated: 0.9223369359970093 Epoch # 43 The loss calculated: 0.9223369359970093 Epoch # 44 The loss calculated: 0.9223369359970093 Epoch # 45 The loss calculated: 0.9223369359970093 Epoch # 46 The loss calculated: 0.9223369359970093 Epoch # 47 The loss calculated: 0.9223369359970093 Epoch # 48 The loss calculated: 0.9223369359970093 Epoch # 49 The loss calculated: 0.9223369359970093 Epoch # 50 The loss calculated: 0.9223369359970093 Epoch # 51 The loss calculated: 0.9223369359970093 Epoch # 52 The loss calculated: 0.9223369359970093 Epoch # 53 The loss calculated: 0.9223369359970093 Epoch # 54 The loss calculated: 0.9223369359970093 Epoch # 55 The loss calculated: 0.9223369359970093 Epoch # 56 The loss calculated: 0.9223369359970093 Epoch # 57 The loss calculated: 0.9223369359970093 Epoch # 58 The loss calculated: 0.9223369359970093 Epoch # 59 The loss calculated: 0.9223369359970093 Epoch # 60 The loss calculated: 0.9223369359970093 Epoch # 61 The loss calculated: 0.9223369359970093 Epoch # 62 The loss calculated: 0.9223369359970093 Epoch # 63 The loss calculated: 0.9223369359970093 Epoch # 64 The loss calculated: 0.9223369359970093 Epoch # 65 The loss calculated: 0.9223369359970093 Epoch # 66 The loss calculated: 0.9223369359970093 Epoch # 67 The loss calculated: 0.9223369359970093 Epoch # 68 The loss calculated: 0.9223369359970093 Epoch # 69 The loss calculated: 0.9223369359970093 Epoch # 70 The loss calculated: 0.9223369359970093 Epoch # 71 The loss calculated: 0.9223369359970093 Epoch # 72 The loss calculated: 0.9223369359970093 Epoch # 73 The loss calculated: 0.9223369359970093 Epoch # 74 The loss calculated: 0.9223369359970093 Epoch # 75 The loss calculated: 0.9223369359970093 Epoch # 76 The loss calculated: 0.9223369359970093 Epoch # 77 The loss calculated: 0.9223369359970093 Epoch # 78 The loss calculated: 0.9223369359970093 Epoch # 79 The loss calculated: 0.9223369359970093 Epoch # 80 The loss calculated: 0.9223369359970093 Epoch # 81 The loss calculated: 0.9223369359970093 Epoch # 82 The loss calculated: 0.9223369359970093 Epoch # 83 The loss calculated: 0.9223369359970093 Epoch # 84 The loss calculated: 0.9223369359970093 Epoch # 85 The loss calculated: 0.9223369359970093 Epoch # 86 The loss calculated: 0.9223369359970093 Epoch # 87 The loss calculated: 0.9223369359970093 Epoch # 88 The loss calculated: 0.9223369359970093 Epoch # 89 The loss calculated: 0.9223369359970093 Epoch # 90 The loss calculated: 0.9223369359970093 Epoch # 91 The loss calculated: 0.9223369359970093 Epoch # 92 The loss calculated: 0.9223369359970093 Epoch # 93 The loss calculated: 0.9223369359970093 Epoch # 94 The loss calculated: 0.9223369359970093 Epoch # 95 The loss calculated: 0.9223369359970093 Epoch # 96 The loss calculated: 0.9223369359970093 Epoch # 97 The loss calculated: 0.9223369359970093 Epoch # 98 The loss calculated: 0.9223369359970093 Epoch # 99 The loss calculated: 0.9223369359970093 Epoch # 100 The loss calculated: 0.9223369359970093
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:11: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument. # This is added back by InteractiveShellApp.init_path()
# Prediction
x_test = Variable(torch.from_numpy(features_test)).float()
pred = model(x_test)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:11: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument. # This is added back by InteractiveShellApp.init_path()
pred = pred.detach().numpy()
pred[1:10]
array([[1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [1., 0., 0.]], dtype=float32)
print ("The accuracy is", accuracy_score(labels_test, np.argmax(pred, axis=1)))
The accuracy is 0.6223776223776224
labels_test[0]
0
torch.save(model, "travel_insurance-pytorch.pkl")
saved_model = torch.load("travel_insurance-pytorch.pkl")
np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:11: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument. # This is added back by InteractiveShellApp.init_path()
0