868 KiB
868 KiB
!pip install -q --upgrade selectivesearch torch_snippets
from torch_snippets import *
import selectivesearch
from google.colab import files
files.upload() # upload kaggle.json file which you can get
# by clicking on Create New API token in your personal account
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d sixhky/open-images-bus-trucks/
!unzip -qq open-images-bus-trucks.zip
from torchvision import transforms, models, datasets
from torch_snippets import Report
from torchvision.ops import nms
device = 'cuda' if torch.cuda.is_available() else 'cpu'
[K |████████████████████████████████| 36.7MB 77kB/s [K |████████████████████████████████| 61kB 9.2MB/s [K |████████████████████████████████| 102kB 13.6MB/s [?25h Building wheel for selectivesearch (setup.py) ... [?25l[?25hdone Building wheel for contextvars (setup.py) ... [?25l[?25hdone
Upload widget is only available when the cell has been executed in the
current browser session. Please rerun this cell to enable.
Saving kaggle.json to kaggle.json kaggle.json Downloading open-images-bus-trucks.zip to /content 94% 344M/367M [00:02<00:00, 111MB/s] 100% 367M/367M [00:03<00:00, 128MB/s]
IMAGE_ROOT = 'images/images'
DF_RAW = pd.read_csv('df.csv')
print(DF_RAW.head())
ImageID Source LabelName ... XClick2Y XClick3Y XClick4Y 0 0000599864fd15b3 xclick Bus ... 0.512700 0.650047 0.457197 1 00006bdb1eb5cd74 xclick Truck ... 0.241855 0.352130 0.437343 2 00006bdb1eb5cd74 xclick Truck ... 0.398496 0.409774 0.295739 3 00010bf498b64bab xclick Bus ... 0.493882 0.705228 0.521691 4 00013f14dd4e168f xclick Bus ... 0.303940 0.999062 0.523452 [5 rows x 21 columns]
class OpenImages(Dataset):
def __init__(self, df, image_folder=IMAGE_ROOT):
self.root = image_folder
self.df = df
self.unique_images = df['ImageID'].unique()
def __len__(self): return len(self.unique_images)
def __getitem__(self, ix):
image_id = self.unique_images[ix]
image_path = f'{self.root}/{image_id}.jpg'
image = cv2.imread(image_path, 1)[...,::-1] # conver BGR to RGB
h, w, _ = image.shape
df = self.df.copy()
df = df[df['ImageID'] == image_id]
boxes = df['XMin,YMin,XMax,YMax'.split(',')].values
boxes = (boxes * np.array([w,h,w,h])).astype(np.uint16).tolist()
classes = df['LabelName'].values.tolist()
return image, boxes, classes, image_path
ds = OpenImages(df=DF_RAW)
im, bbs, clss, _ = ds[9]
show(im, bbs=bbs, texts=clss, sz=10)
def extract_candidates(img):
img_lbl, regions = selectivesearch.selective_search(img, scale=200, min_size=100)
img_area = np.prod(img.shape[:2])
candidates = []
for r in regions:
if r['rect'] in candidates: continue
if r['size'] < (0.05*img_area): continue
if r['size'] > (1*img_area): continue
x, y, w, h = r['rect']
candidates.append(list(r['rect']))
return candidates
def extract_iou(boxA, boxB, epsilon=1e-5):
x1 = max(boxA[0], boxB[0])
y1 = max(boxA[1], boxB[1])
x2 = min(boxA[2], boxB[2])
y2 = min(boxA[3], boxB[3])
width = (x2 - x1)
height = (y2 - y1)
if (width<0) or (height <0):
return 0.0
area_overlap = width * height
area_a = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
area_b = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
area_combined = area_a + area_b - area_overlap
iou = area_overlap / (area_combined+epsilon)
return iou
FPATHS, GTBBS, CLSS, DELTAS, ROIS, IOUS = [], [], [], [], [], []
N = 500
for ix, (im, bbs, labels, fpath) in enumerate(ds):
if(ix==N):
break
H, W, _ = im.shape
candidates = extract_candidates(im)
candidates = np.array([(x,y,x+w,y+h) for x,y,w,h in candidates])
ious, rois, clss, deltas = [], [], [], []
ious = np.array([[extract_iou(candidate, _bb_) for candidate in candidates] for _bb_ in bbs]).T
for jx, candidate in enumerate(candidates):
cx,cy,cX,cY = candidate
candidate_ious = ious[jx]
best_iou_at = np.argmax(candidate_ious)
best_iou = candidate_ious[best_iou_at]
best_bb = _x,_y,_X,_Y = bbs[best_iou_at]
if best_iou > 0.3: clss.append(labels[best_iou_at])
else : clss.append('background')
delta = np.array([_x-cx, _y-cy, _X-cX, _Y-cY]) / np.array([W,H,W,H])
deltas.append(delta)
rois.append(candidate / np.array([W,H,W,H]))
FPATHS.append(fpath)
IOUS.append(ious)
ROIS.append(rois)
CLSS.append(clss)
DELTAS.append(deltas)
GTBBS.append(bbs)
FPATHS = [f'{IMAGE_ROOT}/{stem(f)}.jpg' for f in FPATHS]
FPATHS, GTBBS, CLSS, DELTAS, ROIS = [item for item in [FPATHS, GTBBS, CLSS, DELTAS, ROIS]]
targets = pd.DataFrame(flatten(CLSS), columns=['label'])
label2target = {l:t for t,l in enumerate(targets['label'].unique())}
target2label = {t:l for l,t in label2target.items()}
background_class = label2target['background']
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
def preprocess_image(img):
img = torch.tensor(img).permute(2,0,1)
img = normalize(img)
return img.to(device).float()
def decode(_y):
_, preds = _y.max(-1)
return preds
class FRCNNDataset(Dataset):
def __init__(self, fpaths, rois, labels, deltas, gtbbs):
self.fpaths = fpaths
self.gtbbs = gtbbs
self.rois = rois
self.labels = labels
self.deltas = deltas
def __len__(self): return len(self.fpaths)
def __getitem__(self, ix):
fpath = str(self.fpaths[ix])
image = cv2.imread(fpath, 1)[...,::-1]
gtbbs = self.gtbbs[ix]
rois = self.rois[ix]
labels = self.labels[ix]
deltas = self.deltas[ix]
assert len(rois) == len(labels) == len(deltas), f'{len(rois)}, {len(labels)}, {len(deltas)}'
return image, rois, labels, deltas, gtbbs, fpath
def collate_fn(self, batch):
input, rois, rixs, labels, deltas = [], [], [], [], []
for ix in range(len(batch)):
image, image_rois, image_labels, image_deltas, image_gt_bbs, image_fpath = batch[ix]
image = cv2.resize(image, (224,224))
input.append(preprocess_image(image/255.)[None])
rois.extend(image_rois)
rixs.extend([ix]*len(image_rois))
labels.extend([label2target[c] for c in image_labels])
deltas.extend(image_deltas)
input = torch.cat(input).to(device)
rois = torch.Tensor(rois).float().to(device)
rixs = torch.Tensor(rixs).float().to(device)
labels = torch.Tensor(labels).long().to(device)
deltas = torch.Tensor(deltas).float().to(device)
return input, rois, rixs, labels, deltas
n_train = 9*len(FPATHS)//10
train_ds = FRCNNDataset(FPATHS[:n_train], ROIS[:n_train], CLSS[:n_train], DELTAS[:n_train], GTBBS[:n_train])
test_ds = FRCNNDataset(FPATHS[n_train:], ROIS[n_train:], CLSS[n_train:], DELTAS[n_train:], GTBBS[n_train:])
from torch.utils.data import TensorDataset, DataLoader
train_loader = DataLoader(train_ds, batch_size=2, collate_fn=train_ds.collate_fn, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=2, collate_fn=test_ds.collate_fn, drop_last=True)
from torchvision.ops import RoIPool
class FRCNN(nn.Module):
def __init__(self):
super().__init__()
rawnet = torchvision.models.vgg16_bn(pretrained=True)
for param in rawnet.features.parameters():
param.requires_grad = True
self.seq = nn.Sequential(*list(rawnet.features.children())[:-1])
self.roipool = RoIPool(7, spatial_scale=14/224)
feature_dim = 512*7*7
self.cls_score = nn.Linear(feature_dim, len(label2target))
self.bbox = nn.Sequential(
nn.Linear(feature_dim, 512),
nn.ReLU(),
nn.Linear(512, 4),
nn.Tanh(),
)
self.cel = nn.CrossEntropyLoss()
self.sl1 = nn.L1Loss()
def forward(self, input, rois, ridx):
res = input
res = self.seq(res)
rois = torch.cat([ridx.unsqueeze(-1), rois*224], dim=-1)
res = self.roipool(res, rois)
feat = res.view(len(res), -1)
cls_score = self.cls_score(feat)
bbox = self.bbox(feat) # .view(-1, len(label2target), 4)
return cls_score, bbox
def calc_loss(self, probs, _deltas, labels, deltas):
detection_loss = self.cel(probs, labels)
ixs, = torch.where(labels != background_class)
_deltas = _deltas[ixs]
deltas = deltas[ixs]
self.lmb = 10.0
if len(ixs) > 0:
regression_loss = self.sl1(_deltas, deltas)
return detection_loss + self.lmb * regression_loss, detection_loss.detach(), regression_loss.detach()
else:
regression_loss = 0
return detection_loss + self.lmb * regression_loss, detection_loss.detach(), regression_loss
def train_batch(inputs, model, optimizer, criterion):
input, rois, rixs, clss, deltas = inputs
model.train()
optimizer.zero_grad()
_clss, _deltas = model(input, rois, rixs)
loss, loc_loss, regr_loss = criterion(_clss, _deltas, clss, deltas)
accs = clss == decode(_clss)
loss.backward()
optimizer.step()
return loss.detach(), loc_loss, regr_loss, accs.cpu().numpy()
def validate_batch(inputs, model, criterion):
input, rois, rixs, clss, deltas = inputs
with torch.no_grad():
model.eval()
_clss,_deltas = model(input, rois, rixs)
loss, loc_loss, regr_loss = criterion(_clss, _deltas, clss, deltas)
_clss = decode(_clss)
accs = clss == _clss
return _clss, _deltas, loss.detach(), loc_loss, regr_loss, accs.cpu().numpy()
frcnn = FRCNN().to(device)
criterion = frcnn.calc_loss
optimizer = optim.SGD(frcnn.parameters(), lr=1e-3)
n_epochs = 5
log = Report(n_epochs)
for epoch in range(n_epochs):
_n = len(train_loader)
for ix, inputs in enumerate(train_loader):
loss, loc_loss, regr_loss, accs = train_batch(inputs, frcnn,
optimizer, criterion)
pos = (epoch + (ix+1)/_n)
log.record(pos, trn_loss=loss.item(), trn_loc_loss=loc_loss,
trn_regr_loss=regr_loss,
trn_acc=accs.mean(), end='\r')
_n = len(test_loader)
for ix,inputs in enumerate(test_loader):
_clss, _deltas, loss, \
loc_loss, regr_loss, accs = validate_batch(inputs,
frcnn, criterion)
pos = (epoch + (ix+1)/_n)
log.record(pos, val_loss=loss.item(), val_loc_loss=loc_loss,
val_regr_loss=regr_loss,
val_acc=accs.mean(), end='\r')
log.report_avgs(epoch+1)
# Plotting training and validation metrics
log.plot_epochs('trn_loss,val_loss'.split(','))
Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth
HBox(children=(FloatProgress(value=0.0, max=553507836.0), HTML(value='')))
EPOCH: 4.760 val_loss: 1.340 val_loc_loss: 0.679 val_regr_loss: 0.066 val_acc: 0.765 (64.17s - 3.24s remaining)
0%| | 0/5 [00:00<?, ?it/s]/usr/local/lib/python3.6/dist-packages/numpy/core/fromnumeric.py:3335: RuntimeWarning: Mean of empty slice. out=out, **kwargs) /usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:161: RuntimeWarning: invalid value encountered in double_scalars ret = ret.dtype.type(ret / rcount) 100%|██████████| 5/5 [00:00<00:00, 739.32it/s]
EPOCH: 4.800 val_loss: 0.984 val_loc_loss: 0.369 val_regr_loss: 0.061 val_acc: 0.854 (64.19s - 2.67s remaining) EPOCH: 4.840 val_loss: 1.032 val_loc_loss: 0.419 val_regr_loss: 0.061 val_acc: 0.779 (64.21s - 2.12s remaining) EPOCH: 4.880 val_loss: 1.138 val_loc_loss: 0.489 val_regr_loss: 0.065 val_acc: 0.691 (64.24s - 1.58s remaining) EPOCH: 4.920 val_loss: 1.693 val_loc_loss: 0.811 val_regr_loss: 0.088 val_acc: 0.609 (64.26s - 1.04s remaining) EPOCH: 4.960 val_loss: 1.475 val_loc_loss: 0.567 val_regr_loss: 0.091 val_acc: 0.824 (64.28s - 0.52s remaining) EPOCH: 5.000 val_loss: 1.979 val_loc_loss: 0.967 val_regr_loss: 0.101 val_acc: 0.455 (64.30s - 0.00s remaining)
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.patches as mpatches
from torchvision.ops import nms
from PIL import Image
def test_predictions(filename):
img = cv2.resize(np.array(Image.open(filename)), (224,224))
candidates = extract_candidates(img)
candidates = [(x,y,x+w,y+h) for x,y,w,h in candidates]
input = preprocess_image(img/255.)[None]
rois = [[x/224,y/224,X/224,Y/224] for x,y,X,Y in candidates]
rixs = np.array([0]*len(rois))
rois, rixs = [torch.Tensor(item).to(device) for item in [rois, rixs]]
with torch.no_grad():
frcnn.eval()
probs, deltas = frcnn(input, rois, rixs)
confs, clss = torch.max(probs, -1)
candidates = np.array(candidates)
confs, clss, probs, deltas = [tensor.detach().cpu().numpy() for tensor in [confs, clss, probs, deltas]]
ixs = clss!=background_class
confs, clss, probs, deltas, candidates = [tensor[ixs] for tensor in [confs, clss, probs, deltas, candidates]]
bbs = candidates + deltas
ixs = nms(torch.tensor(bbs.astype(np.float32)), torch.tensor(confs), 0.05)
confs, clss, probs, deltas, candidates, bbs = [tensor[ixs] for tensor in [confs, clss, probs, deltas, candidates, bbs]]
if len(ixs) == 1:
confs, clss, probs, deltas, candidates, bbs = [tensor[None] for tensor in [confs, clss, probs, deltas, candidates, bbs]]
bbs = bbs.astype(np.uint16)
_, ax = plt.subplots(1, 2, figsize=(20,10))
show(img, ax=ax[0])
ax[0].grid(False)
ax[0].set_title(filename.split('/')[-1])
if len(confs) == 0:
ax[1].imshow(img)
ax[1].set_title('No objects')
plt.show()
return
else:
show(img, bbs=bbs.tolist(), texts=[target2label[c] for c in clss.tolist()], ax=ax[1])
plt.show()
test_predictions(test_ds[29][-1])