603 KiB
603 KiB
!pip install -q --upgrade selectivesearch torch_snippets
from torch_snippets import *
import selectivesearch
#from google.colab import files
#files.upload() # upload kaggle.json file which you can get
# by clicking on Create New API token in your personal account
#!mkdir -p ~/.kaggle
#!mv kaggle.json ~/.kaggle/
#!ls ~/.kaggle
#!chmod 600 /root/.kaggle/kaggle.json
#!kaggle datasets download -d sixhky/open-images-bus-trucks/
#!unzip -qq open-images-bus-trucks.zip
from torchvision import transforms, models, datasets
from torch_snippets import Report
from torchvision.ops import nms
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
cpu
IMAGE_ROOT = 'images/images'
DF_RAW = pd.read_csv('images/df.csv')
print(DF_RAW.head())
ImageID Source LabelName Confidence XMin XMax \ 0 0000599864fd15b3 xclick Bus 1 0.343750 0.908750 1 00006bdb1eb5cd74 xclick Truck 1 0.276667 0.697500 2 00006bdb1eb5cd74 xclick Truck 1 0.702500 0.999167 3 00010bf498b64bab xclick Bus 1 0.156250 0.371250 4 00013f14dd4e168f xclick Bus 1 0.287500 0.999375 YMin YMax IsOccluded IsTruncated ... IsDepiction IsInside \ 0 0.156162 0.650047 1 0 ... 0 0 1 0.141604 0.437343 1 0 ... 0 0 2 0.204261 0.409774 1 1 ... 0 0 3 0.269188 0.705228 0 0 ... 0 0 4 0.194184 0.999062 0 1 ... 0 0 XClick1X XClick2X XClick3X XClick4X XClick1Y XClick2Y XClick3Y \ 0 0.421875 0.343750 0.795000 0.908750 0.156162 0.512700 0.650047 1 0.299167 0.276667 0.697500 0.659167 0.141604 0.241855 0.352130 2 0.849167 0.702500 0.906667 0.999167 0.204261 0.398496 0.409774 3 0.274375 0.371250 0.311875 0.156250 0.269188 0.493882 0.705228 4 0.920000 0.999375 0.648750 0.287500 0.194184 0.303940 0.999062 XClick4Y 0 0.457197 1 0.437343 2 0.295739 3 0.521691 4 0.523452 [5 rows x 21 columns]
class OpenImages(Dataset):
def __init__(self, df, image_folder=IMAGE_ROOT):
self.root = image_folder
self.df = df
self.unique_images = df['ImageID'].unique()
def __len__(self): return len(self.unique_images)
def __getitem__(self, ix):
image_id = self.unique_images[ix]
image_path = f'{self.root}/{image_id}.jpg'
image = cv2.imread(image_path, 1)[...,::-1] # conver BGR to RGB
h, w, _ = image.shape
df = self.df.copy()
df = df[df['ImageID'] == image_id]
boxes = df['XMin,YMin,XMax,YMax'.split(',')].values
boxes = (boxes * np.array([w,h,w,h])).astype(np.uint16).tolist()
classes = df['LabelName'].values.tolist()
return image, boxes, classes, image_path
ds = OpenImages(df=DF_RAW)
im, bbs, clss, _ = ds[9]
show(im, bbs=bbs, texts=clss, sz=10)
def extract_candidates(img):
img_lbl, regions = selectivesearch.selective_search(img, scale=200, min_size=100)
img_area = np.prod(img.shape[:2])
candidates = []
for r in regions:
if r['rect'] in candidates: continue
if r['size'] < (0.05*img_area): continue
if r['size'] > (1*img_area): continue
x, y, w, h = r['rect']
candidates.append(list(r['rect']))
return candidates
def extract_iou(boxA, boxB, epsilon=1e-5):
x1 = max(boxA[0], boxB[0])
y1 = max(boxA[1], boxB[1])
x2 = min(boxA[2], boxB[2])
y2 = min(boxA[3], boxB[3])
width = (x2 - x1)
height = (y2 - y1)
if (width<0) or (height <0):
return 0.0
area_overlap = width * height
area_a = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
area_b = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
area_combined = area_a + area_b - area_overlap
iou = area_overlap / (area_combined+epsilon)
return iou
FPATHS, GTBBS, CLSS, DELTAS, ROIS, IOUS = [], [], [], [], [], []
N = 500
for ix, (im, bbs, labels, fpath) in enumerate(ds):
if(ix==N):
break
H, W, _ = im.shape
candidates = extract_candidates(im)
candidates = np.array([(x,y,x+w,y+h) for x,y,w,h in candidates])
ious, rois, clss, deltas = [], [], [], []
ious = np.array([[extract_iou(candidate, _bb_) for candidate in candidates] for _bb_ in bbs]).T
for jx, candidate in enumerate(candidates):
cx,cy,cX,cY = candidate
candidate_ious = ious[jx]
best_iou_at = np.argmax(candidate_ious)
best_iou = candidate_ious[best_iou_at]
best_bb = _x,_y,_X,_Y = bbs[best_iou_at]
if best_iou > 0.3: clss.append(labels[best_iou_at])
else : clss.append('background')
delta = np.array([_x-cx, _y-cy, _X-cX, _Y-cY]) / np.array([W,H,W,H])
deltas.append(delta)
rois.append(candidate / np.array([W,H,W,H]))
FPATHS.append(fpath)
IOUS.append(ious)
ROIS.append(rois)
CLSS.append(clss)
DELTAS.append(deltas)
GTBBS.append(bbs)
FPATHS = [f'{IMAGE_ROOT}/{stem(f)}.jpg' for f in FPATHS]
FPATHS, GTBBS, CLSS, DELTAS, ROIS = [item for item in [FPATHS, GTBBS, CLSS, DELTAS, ROIS]]
targets = pd.DataFrame(flatten(CLSS), columns=['label'])
label2target = {l:t for t,l in enumerate(targets['label'].unique())}
target2label = {t:l for l,t in label2target.items()}
background_class = label2target['background']
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
def preprocess_image(img):
img = torch.tensor(img).permute(2,0,1)
img = normalize(img)
return img.to(device).float()
def decode(_y):
_, preds = _y.max(-1)
return preds
class RCNNDataset(Dataset):
def __init__(self, fpaths, rois, labels, deltas, gtbbs):
self.fpaths = fpaths
self.gtbbs = gtbbs
self.rois = rois
self.labels = labels
self.deltas = deltas
def __len__(self): return len(self.fpaths)
def __getitem__(self, ix):
fpath = str(self.fpaths[ix])
image = cv2.imread(fpath, 1)[...,::-1]
H, W, _ = image.shape
sh = np.array([W,H,W,H])
gtbbs = self.gtbbs[ix]
rois = self.rois[ix]
bbs = (np.array(rois)*sh).astype(np.uint16)
labels = self.labels[ix]
deltas = self.deltas[ix]
crops = [image[y:Y,x:X] for (x,y,X,Y) in bbs]
return image, crops, bbs, labels, deltas, gtbbs, fpath
def collate_fn(self, batch):
input, rois, rixs, labels, deltas = [], [], [], [], []
for ix in range(len(batch)):
image, crops, image_bbs, image_labels, image_deltas, image_gt_bbs, image_fpath = batch[ix]
crops = [cv2.resize(crop, (224,224)) for crop in crops]
crops = [preprocess_image(crop/255.)[None] for crop in crops]
input.extend(crops)
labels.extend([label2target[c] for c in image_labels])
deltas.extend(image_deltas)
input = torch.cat(input).to(device)
labels = torch.Tensor(labels).long().to(device)
deltas = torch.Tensor(deltas).float().to(device)
return input, labels, deltas
n_train = 9*len(FPATHS)//10
train_ds = RCNNDataset(FPATHS[:n_train], ROIS[:n_train], CLSS[:n_train], DELTAS[:n_train], GTBBS[:n_train])
test_ds = RCNNDataset(FPATHS[n_train:], ROIS[n_train:], CLSS[n_train:], DELTAS[n_train:], GTBBS[n_train:])
from torch.utils.data import TensorDataset, DataLoader
train_loader = DataLoader(train_ds, batch_size=2, collate_fn=train_ds.collate_fn, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=2, collate_fn=test_ds.collate_fn, drop_last=True)
vgg_backbone = models.vgg16(pretrained=True)
vgg_backbone.classifier = nn.Sequential()
for param in vgg_backbone.parameters():
param.requires_grad = False
vgg_backbone.eval().to(device)
C:\Users\frakt\anaconda3\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( C:\Users\frakt\anaconda3\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg) Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\frakt/.cache\torch\hub\checkpoints\vgg16-397923af.pth 100%|███████████████████████████████████████████████████████████████████████████████| 528M/528M [01:04<00:00, 8.64MB/s]
VGG( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace=True) (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace=True) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace=True) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace=True) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace=True) (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (18): ReLU(inplace=True) (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace=True) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace=True) (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (25): ReLU(inplace=True) (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (27): ReLU(inplace=True) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace=True) (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (avgpool): AdaptiveAvgPool2d(output_size=(7, 7)) (classifier): Sequential() )
class RCNN(nn.Module):
def __init__(self):
super().__init__()
feature_dim = 25088
self.backbone = vgg_backbone
self.cls_score = nn.Linear(feature_dim, len(label2target))
self.bbox = nn.Sequential(
nn.Linear(feature_dim, 512),
nn.ReLU(),
nn.Linear(512, 4),
nn.Tanh(),
)
self.cel = nn.CrossEntropyLoss()
self.sl1 = nn.L1Loss()
def forward(self, input):
feat = self.backbone(input)
cls_score = self.cls_score(feat)
bbox = self.bbox(feat)
return cls_score, bbox
def calc_loss(self, probs, _deltas, labels, deltas):
detection_loss = self.cel(probs, labels)
ixs, = torch.where(labels != 0)
_deltas = _deltas[ixs]
deltas = deltas[ixs]
self.lmb = 10.0
if len(ixs) > 0:
regression_loss = self.sl1(_deltas, deltas)
return detection_loss + self.lmb * regression_loss, detection_loss.detach(), regression_loss.detach()
else:
regression_loss = 0
return detection_loss + self.lmb * regression_loss, detection_loss.detach(), regression_loss
def train_batch(inputs, model, optimizer, criterion):
input, clss, deltas = inputs
model.train()
optimizer.zero_grad()
_clss, _deltas = model(input)
loss, loc_loss, regr_loss = criterion(_clss, _deltas, clss, deltas)
accs = clss == decode(_clss)
loss.backward()
optimizer.step()
return loss.detach(), loc_loss, regr_loss, accs.cpu().numpy()
@torch.no_grad()
def validate_batch(inputs, model, criterion):
input, clss, deltas = inputs
with torch.no_grad():
model.eval()
_clss,_deltas = model(input)
loss, loc_loss, regr_loss = criterion(_clss, _deltas, clss, deltas)
_, _clss = _clss.max(-1)
accs = clss == _clss
return _clss, _deltas, loss.detach(), loc_loss, regr_loss, accs.cpu().numpy()
rcnn = RCNN().to(device)
criterion = rcnn.calc_loss
optimizer = optim.SGD(rcnn.parameters(), lr=1e-3)
n_epochs = 5
log = Report(n_epochs)
for epoch in range(n_epochs):
_n = len(train_loader)
for ix, inputs in enumerate(train_loader):
loss, loc_loss, regr_loss, accs = train_batch(inputs, rcnn,
optimizer, criterion)
pos = (epoch + (ix+1)/_n)
log.record(pos, trn_loss=loss.item(), trn_loc_loss=loc_loss,
trn_regr_loss=regr_loss,
trn_acc=accs.mean(), end='\r')
_n = len(test_loader)
for ix,inputs in enumerate(test_loader):
_clss, _deltas, loss, \
loc_loss, regr_loss, accs = validate_batch(inputs,
rcnn, criterion)
pos = (epoch + (ix+1)/_n)
log.record(pos, val_loss=loss.item(), val_loc_loss=loc_loss,
val_regr_loss=regr_loss,
val_acc=accs.mean(), end='\r')
# Plotting training and validation metrics
log.plot_epochs('trn_loss,val_loss'.split(','))
C:\Users\frakt\AppData\Local\Temp\ipykernel_27780\4275183504.py:32: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\utils\tensor_new.cpp:248.) deltas = torch.Tensor(deltas).float().to(device)
EPOCH: 0.409 trn_loss: 2.199 trn_loc_loss: 0.554 trn_regr_loss: 0.164 trn_acc: 0.792 (821.90s - 9228.49s remaining))
[1;31m---------------------------------------------------------------------------[0m [1;31mKeyboardInterrupt[0m Traceback (most recent call last) [1;32m~\AppData\Local\Temp\ipykernel_27780\1533349013.py[0m in [0;36m<module>[1;34m[0m [0;32m 3[0m [0m_n[0m [1;33m=[0m [0mlen[0m[1;33m([0m[0mtrain_loader[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0;32m 4[0m [1;32mfor[0m [0mix[0m[1;33m,[0m [0minputs[0m [1;32min[0m [0menumerate[0m[1;33m([0m[0mtrain_loader[0m[1;33m)[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [1;32m----> 5[1;33m loss, loc_loss, regr_loss, accs = train_batch(inputs, rcnn, [0m[0;32m 6[0m optimizer, criterion) [0;32m 7[0m [0mpos[0m [1;33m=[0m [1;33m([0m[0mepoch[0m [1;33m+[0m [1;33m([0m[0mix[0m[1;33m+[0m[1;36m1[0m[1;33m)[0m[1;33m/[0m[0m_n[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [1;32m~\AppData\Local\Temp\ipykernel_27780\1507503000.py[0m in [0;36mtrain_batch[1;34m(inputs, model, optimizer, criterion)[0m [0;32m 3[0m [0mmodel[0m[1;33m.[0m[0mtrain[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0;32m 4[0m [0moptimizer[0m[1;33m.[0m[0mzero_grad[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [1;32m----> 5[1;33m [0m_clss[0m[1;33m,[0m [0m_deltas[0m [1;33m=[0m [0mmodel[0m[1;33m([0m[0minput[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 6[0m [0mloss[0m[1;33m,[0m [0mloc_loss[0m[1;33m,[0m [0mregr_loss[0m [1;33m=[0m [0mcriterion[0m[1;33m([0m[0m_clss[0m[1;33m,[0m [0m_deltas[0m[1;33m,[0m [0mclss[0m[1;33m,[0m [0mdeltas[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0;32m 7[0m [0maccs[0m [1;33m=[0m [0mclss[0m [1;33m==[0m [0mdecode[0m[1;33m([0m[0m_clss[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\module.py[0m in [0;36m_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1499[0m [1;32mor[0m [0m_global_backward_pre_hooks[0m [1;32mor[0m [0m_global_backward_hooks[0m[1;33m[0m[1;33m[0m[0m [0;32m 1500[0m or _global_forward_hooks or _global_forward_pre_hooks): [1;32m-> 1501[1;33m [1;32mreturn[0m [0mforward_call[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 1502[0m [1;31m# Do not call functions when jit is used[0m[1;33m[0m[1;33m[0m[0m [0;32m 1503[0m [0mfull_backward_hooks[0m[1;33m,[0m [0mnon_full_backward_hooks[0m [1;33m=[0m [1;33m[[0m[1;33m][0m[1;33m,[0m [1;33m[[0m[1;33m][0m[1;33m[0m[1;33m[0m[0m [1;32m~\AppData\Local\Temp\ipykernel_27780\2122026028.py[0m in [0;36mforward[1;34m(self, input)[0m [0;32m 14[0m [0mself[0m[1;33m.[0m[0msl1[0m [1;33m=[0m [0mnn[0m[1;33m.[0m[0mL1Loss[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0;32m 15[0m [1;32mdef[0m [0mforward[0m[1;33m([0m[0mself[0m[1;33m,[0m [0minput[0m[1;33m)[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [1;32m---> 16[1;33m [0mfeat[0m [1;33m=[0m [0mself[0m[1;33m.[0m[0mbackbone[0m[1;33m([0m[0minput[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 17[0m [0mcls_score[0m [1;33m=[0m [0mself[0m[1;33m.[0m[0mcls_score[0m[1;33m([0m[0mfeat[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0;32m 18[0m [0mbbox[0m [1;33m=[0m [0mself[0m[1;33m.[0m[0mbbox[0m[1;33m([0m[0mfeat[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\module.py[0m in [0;36m_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1499[0m [1;32mor[0m [0m_global_backward_pre_hooks[0m [1;32mor[0m [0m_global_backward_hooks[0m[1;33m[0m[1;33m[0m[0m [0;32m 1500[0m or _global_forward_hooks or _global_forward_pre_hooks): [1;32m-> 1501[1;33m [1;32mreturn[0m [0mforward_call[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 1502[0m [1;31m# Do not call functions when jit is used[0m[1;33m[0m[1;33m[0m[0m [0;32m 1503[0m [0mfull_backward_hooks[0m[1;33m,[0m [0mnon_full_backward_hooks[0m [1;33m=[0m [1;33m[[0m[1;33m][0m[1;33m,[0m [1;33m[[0m[1;33m][0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torchvision\models\vgg.py[0m in [0;36mforward[1;34m(self, x)[0m [0;32m 64[0m [1;33m[0m[0m [0;32m 65[0m [1;32mdef[0m [0mforward[0m[1;33m([0m[0mself[0m[1;33m,[0m [0mx[0m[1;33m:[0m [0mtorch[0m[1;33m.[0m[0mTensor[0m[1;33m)[0m [1;33m->[0m [0mtorch[0m[1;33m.[0m[0mTensor[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [1;32m---> 66[1;33m [0mx[0m [1;33m=[0m [0mself[0m[1;33m.[0m[0mfeatures[0m[1;33m([0m[0mx[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 67[0m [0mx[0m [1;33m=[0m [0mself[0m[1;33m.[0m[0mavgpool[0m[1;33m([0m[0mx[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0;32m 68[0m [0mx[0m [1;33m=[0m [0mtorch[0m[1;33m.[0m[0mflatten[0m[1;33m([0m[0mx[0m[1;33m,[0m [1;36m1[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\module.py[0m in [0;36m_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1499[0m [1;32mor[0m [0m_global_backward_pre_hooks[0m [1;32mor[0m [0m_global_backward_hooks[0m[1;33m[0m[1;33m[0m[0m [0;32m 1500[0m or _global_forward_hooks or _global_forward_pre_hooks): [1;32m-> 1501[1;33m [1;32mreturn[0m [0mforward_call[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 1502[0m [1;31m# Do not call functions when jit is used[0m[1;33m[0m[1;33m[0m[0m [0;32m 1503[0m [0mfull_backward_hooks[0m[1;33m,[0m [0mnon_full_backward_hooks[0m [1;33m=[0m [1;33m[[0m[1;33m][0m[1;33m,[0m [1;33m[[0m[1;33m][0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\container.py[0m in [0;36mforward[1;34m(self, input)[0m [0;32m 215[0m [1;32mdef[0m [0mforward[0m[1;33m([0m[0mself[0m[1;33m,[0m [0minput[0m[1;33m)[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [0;32m 216[0m [1;32mfor[0m [0mmodule[0m [1;32min[0m [0mself[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [1;32m--> 217[1;33m [0minput[0m [1;33m=[0m [0mmodule[0m[1;33m([0m[0minput[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 218[0m [1;32mreturn[0m [0minput[0m[1;33m[0m[1;33m[0m[0m [0;32m 219[0m [1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\module.py[0m in [0;36m_call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1499[0m [1;32mor[0m [0m_global_backward_pre_hooks[0m [1;32mor[0m [0m_global_backward_hooks[0m[1;33m[0m[1;33m[0m[0m [0;32m 1500[0m or _global_forward_hooks or _global_forward_pre_hooks): [1;32m-> 1501[1;33m [1;32mreturn[0m [0mforward_call[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 1502[0m [1;31m# Do not call functions when jit is used[0m[1;33m[0m[1;33m[0m[0m [0;32m 1503[0m [0mfull_backward_hooks[0m[1;33m,[0m [0mnon_full_backward_hooks[0m [1;33m=[0m [1;33m[[0m[1;33m][0m[1;33m,[0m [1;33m[[0m[1;33m][0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\conv.py[0m in [0;36mforward[1;34m(self, input)[0m [0;32m 461[0m [1;33m[0m[0m [0;32m 462[0m [1;32mdef[0m [0mforward[0m[1;33m([0m[0mself[0m[1;33m,[0m [0minput[0m[1;33m:[0m [0mTensor[0m[1;33m)[0m [1;33m->[0m [0mTensor[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [1;32m--> 463[1;33m [1;32mreturn[0m [0mself[0m[1;33m.[0m[0m_conv_forward[0m[1;33m([0m[0minput[0m[1;33m,[0m [0mself[0m[1;33m.[0m[0mweight[0m[1;33m,[0m [0mself[0m[1;33m.[0m[0mbias[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m [0m[0;32m 464[0m [1;33m[0m[0m [0;32m 465[0m [1;32mclass[0m [0mConv3d[0m[1;33m([0m[0m_ConvNd[0m[1;33m)[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m [1;32m~\anaconda3\lib\site-packages\torch\nn\modules\conv.py[0m in [0;36m_conv_forward[1;34m(self, input, weight, bias)[0m [0;32m 457[0m [0mweight[0m[1;33m,[0m [0mbias[0m[1;33m,[0m [0mself[0m[1;33m.[0m[0mstride[0m[1;33m,[0m[1;33m[0m[1;33m[0m[0m [0;32m 458[0m _pair(0), self.dilation, self.groups) [1;32m--> 459[1;33m return F.conv2d(input, weight, bias, self.stride, [0m[0;32m 460[0m self.padding, self.dilation, self.groups) [0;32m 461[0m [1;33m[0m[0m [1;31mKeyboardInterrupt[0m:
def test_predictions(filename, show_output=True):
img = np.array(cv2.imread(filename, 1)[...,::-1])
candidates = extract_candidates(img)
candidates = [(x,y,x+w,y+h) for x,y,w,h in candidates]
input = []
for candidate in candidates:
x,y,X,Y = candidate
crop = cv2.resize(img[y:Y,x:X], (224,224))
input.append(preprocess_image(crop/255.)[None])
input = torch.cat(input).to(device)
with torch.no_grad():
rcnn.eval()
probs, deltas = rcnn(input)
probs = torch.nn.functional.softmax(probs, -1)
confs, clss = torch.max(probs, -1)
candidates = np.array(candidates)
confs, clss, probs, deltas = [tensor.detach().cpu().numpy() for tensor in [confs, clss, probs, deltas]]
ixs = clss!=background_class
confs, clss, probs, deltas, candidates = [tensor[ixs] for tensor in [confs, clss, probs, deltas, candidates]]
bbs = (candidates + deltas).astype(np.uint16)
ixs = nms(torch.tensor(bbs.astype(np.float32)), torch.tensor(confs), 0.05)
confs, clss, probs, deltas, candidates, bbs = [tensor[ixs] for tensor in [confs, clss, probs, deltas, candidates, bbs]]
if len(ixs) == 1:
confs, clss, probs, deltas, candidates, bbs = [tensor[None] for tensor in [confs, clss, probs, deltas, candidates, bbs]]
if len(confs) == 0 and not show_output:
return (0,0,224,224), 'background', 0
if len(confs) > 0:
best_pred = np.argmax(confs)
best_conf = np.max(confs)
best_bb = bbs[best_pred]
x,y,X,Y = best_bb
_, ax = plt.subplots(1, 2, figsize=(20,10))
show(img, ax=ax[0])
ax[0].grid(False)
ax[0].set_title('Original image')
if len(confs) == 0:
ax[1].imshow(img)
ax[1].set_title('No objects')
plt.show()
return
ax[1].set_title(target2label[clss[best_pred]])
show(img, bbs=bbs.tolist(), texts=[target2label[c] for c in clss.tolist()], ax=ax[1], title='predicted bounding box and class')
plt.show()
return (x,y,X,Y),target2label[clss[best_pred]],best_conf
image, crops, bbs, labels, deltas, gtbbs, fpath = test_ds[7]
test_predictions(fpath)
((16, 60, 218, 133), 'Bus', 0.9851093)