Computer_Vision/Chapter09/Instance_Segmentation.ipynb

1.2 MiB
Raw Permalink Blame History

Open In Colab

!wget --quiet http://sceneparsing.csail.mit.edu/data/ChallengeData2017/images.tar
!wget --quiet http://sceneparsing.csail.mit.edu/data/ChallengeData2017/annotations_instance.tar
!tar -xf images.tar
!tar -xf annotations_instance.tar
!rm images.tar annotations_instance.tar
!pip install -qU torch_snippets
!wget --quiet https://raw.githubusercontent.com/pytorch/vision/release/0.12/references/detection/engine.py
!wget --quiet https://raw.githubusercontent.com/pytorch/vision/release/0.12/references/detection/utils.py
!wget --quiet https://raw.githubusercontent.com/pytorch/vision/release/0.12/references/detection/transforms.py
!wget --quiet https://raw.githubusercontent.com/pytorch/vision/release/0.12/references/detection/coco_eval.py
!wget --quiet https://raw.githubusercontent.com/pytorch/vision/release/0.12/references/detection/coco_utils.py
!pip install -q -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
     |████████████████████████████████| 48 kB 2.6 MB/s 
     |████████████████████████████████| 60 kB 5.9 MB/s 
     |████████████████████████████████| 10.9 MB 30.2 MB/s 
     |████████████████████████████████| 78 kB 6.3 MB/s 
     |████████████████████████████████| 948 kB 29.1 MB/s 
     |████████████████████████████████| 58 kB 5.3 MB/s 
     |████████████████████████████████| 232 kB 48.2 MB/s 
     |████████████████████████████████| 51 kB 6.4 MB/s 
[?25h  Building wheel for typing (setup.py) ... [?25l[?25hdone
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.4.0 which is incompatible.
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
from torch_snippets import *
from torch_snippets.inspector import inspect
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

from engine import train_one_epoch, evaluate
import utils
import transforms as T
device = 'cuda' if torch.cuda.is_available() else 'cpu'
all_images = Glob('images/training')
all_annots = Glob('annotations_instance/training')
f = 'ADE_train_00014301'

im = read(find(f, all_images), 1)
an = read(find(f, all_annots), 1).transpose(2,0,1)
r,g,b = an
nzs = np.nonzero(r==4) # 4 stands for person
instances = np.unique(g[nzs])
masks = np.zeros((len(instances), *r.shape))
for ix,_id in enumerate(instances):
    masks[ix] = g==_id

subplots([im, *masks], sz=20)
annots = []
for ann in Tqdm(all_annots[:5000]):
    _ann = read(ann, 1).transpose(2,0,1)
    r,g,b = _ann
    if 4 not in np.unique(r): continue
    annots.append(ann)
100%|██████████| 5000/5000 [00:27<00:00, 179.49it/s]
from sklearn.model_selection import train_test_split
_annots = stems(annots)
trn_items, val_items = train_test_split(_annots, random_state=2)
def get_transform(train):
    image_transforms = []
    image_transforms.append(T.PILToTensor())
    if train:
        image_transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(image_transforms)
class MasksDataset(Dataset):
    def __init__(self, items, transforms, N):
        self.items = items
        self.transforms = transforms
        self.N = N
    def get_mask(self, path):
        an = read(path, 1).transpose(2,0,1)
        r,g,b = an
        nzs = np.nonzero(r==4)
        instances = np.unique(g[nzs])
        masks = np.zeros((len(instances), *r.shape))
        for ix,_id in enumerate(instances):
            masks[ix] = g==_id
        return masks
    def __getitem__(self, ix):
        _id = self.items[ix]
        img_path = f'images/training/{_id}.jpg'
        mask_path = f'annotations_instance/training/{_id}.png'
        masks = self.get_mask(mask_path)
        obj_ids = np.arange(1, len(masks)+1)
        img = Image.open(img_path).convert("RGB")
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            obj_pixels = np.where(masks[i])
            xmin = np.min(obj_pixels[1])
            xmax = np.max(obj_pixels[1])
            ymin = np.min(obj_pixels[0])
            ymax = np.max(obj_pixels[0])
            if (((xmax-xmin)<=10) | (ymax-ymin)<=10):
                xmax = xmin+10
                ymax = ymin+10
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        image_id = torch.tensor([ix])
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        if (img.dtype == torch.float32) or (img.dtype == torch.uint8) :
          img = img/255.
        return img, target
    def __len__(self):
        return self.N
    def choose(self):
        return self[randint(len(self))]
x = MasksDataset(trn_items, get_transform(train=True), N=100)
im,targ = x[0]
inspect(im,targ)
subplots([im, *targ['masks']], sz=10)
 Tensor	Shape: torch.Size([3, 512, 684])	Min: 0.000	Max: 1.000	Mean: 0.486	dtype: torch.float32
 Dict Of 6 items
	BOXES:
	 Tensor	Shape: torch.Size([3, 4])	Min: 42.000	Max: 477.000	Mean: 259.417	dtype: torch.float32
	LABELS:
	 Tensor	Shape: torch.Size([3])	Min: 1.000	Max: 1.000	Mean: 1.000	dtype: torch.int64
	MASKS:
	 Tensor	Shape: torch.Size([3, 512, 684])	Min: 0.000	Max: 1.000	Mean: 0.008	dtype: torch.uint8
	IMAGE_ID:
	 Tensor	Shape: torch.Size([1])	Min: 0.000	Max: 0.000	Mean: 0.000	dtype: torch.int64
	AREA:
	 Tensor	Shape: torch.Size([3])	Min: 1932.000	Max: 10688.000	Mean: 5270.667	dtype: torch.float32
... ... 1 more item(s)
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,num_classes)
    return model
model = get_model_instance_segmentation(2).to(device)
model
MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256, eps=0.0)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(inplace=True)
        )
      )
      (layer2): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128, eps=0.0)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128, eps=0.0)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512, eps=0.0)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(512, eps=0.0)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128, eps=0.0)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128, eps=0.0)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128, eps=0.0)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128, eps=0.0)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128, eps=0.0)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128, eps=0.0)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512, eps=0.0)
          (relu): ReLU(inplace=True)
        )
      )
      (layer3): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256, eps=0.0)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256, eps=0.0)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024, eps=0.0)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(1024, eps=0.0)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256, eps=0.0)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256, eps=0.0)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256, eps=0.0)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256, eps=0.0)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256, eps=0.0)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256, eps=0.0)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (4): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256, eps=0.0)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256, eps=0.0)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (5): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256, eps=0.0)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256, eps=0.0)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024, eps=0.0)
          (relu): ReLU(inplace=True)
        )
      )
      (layer4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512, eps=0.0)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512, eps=0.0)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048, eps=0.0)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(2048, eps=0.0)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512, eps=0.0)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512, eps=0.0)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048, eps=0.0)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512, eps=0.0)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512, eps=0.0)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048, eps=0.0)
          (relu): ReLU(inplace=True)
        )
      )
    )
    (fpn): FeaturePyramidNetwork(
      (inner_blocks): ModuleList(
        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
        (2): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
        (3): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
      )
      (layer_blocks): ModuleList(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (extra_blocks): LastLevelMaxPool()
    )
  )
  (rpn): RegionProposalNetwork(
    (anchor_generator): AnchorGenerator()
    (head): RPNHead(
      (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
      (bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
    )
  )
  (roi_heads): RoIHeads(
    (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(7, 7), sampling_ratio=2)
    (box_head): TwoMLPHead(
      (fc6): Linear(in_features=12544, out_features=1024, bias=True)
      (fc7): Linear(in_features=1024, out_features=1024, bias=True)
    )
    (box_predictor): FastRCNNPredictor(
      (cls_score): Linear(in_features=1024, out_features=2, bias=True)
      (bbox_pred): Linear(in_features=1024, out_features=8, bias=True)
    )
    (mask_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(14, 14), sampling_ratio=2)
    (mask_head): MaskRCNNHeads(
      (mask_fcn1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu1): ReLU(inplace=True)
      (mask_fcn2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu2): ReLU(inplace=True)
      (mask_fcn3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu3): ReLU(inplace=True)
      (mask_fcn4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu4): ReLU(inplace=True)
    )
    (mask_predictor): MaskRCNNPredictor(
      (conv5_mask): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
      (relu): ReLU(inplace=True)
      (mask_fcn_logits): Conv2d(256, 2, kernel_size=(1, 1), stride=(1, 1))
    )
  )
)
dataset = MasksDataset(trn_items, get_transform(train=True), N=len(trn_items))
dataset_test = MasksDataset(val_items, get_transform(train=False), N=len(val_items))

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=0,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn)
num_classes = 2
model = get_model_instance_segmentation(num_classes).to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)
num_epochs = 1

trn_history = []
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    res = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    trn_history.append(res)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset 
    res = evaluate(model, data_loader_test, device=device)
Epoch: [0]  [  0/482]  eta: 0:19:00  lr: 0.000015  loss: 5.5752 (5.5752)  loss_classifier: 0.8792 (0.8792)  loss_box_reg: 0.8384 (0.8384)  loss_mask: 3.7591 (3.7591)  loss_objectness: 0.0579 (0.0579)  loss_rpn_box_reg: 0.0406 (0.0406)  time: 2.3664  data: 0.2396  max mem: 5656
Epoch: [0]  [ 10/482]  eta: 0:17:21  lr: 0.000119  loss: 4.4057 (4.4962)  loss_classifier: 0.8939 (0.8836)  loss_box_reg: 0.3374 (0.3255)  loss_mask: 3.1220 (3.2297)  loss_objectness: 0.0395 (0.0374)  loss_rpn_box_reg: 0.0189 (0.0200)  time: 2.2061  data: 0.0599  max mem: 7010
Epoch: [0]  [ 20/482]  eta: 0:16:13  lr: 0.000223  loss: 2.3401 (3.1059)  loss_classifier: 0.5854 (0.6268)  loss_box_reg: 0.2879 (0.3154)  loss_mask: 1.5016 (2.1052)  loss_objectness: 0.0275 (0.0391)  loss_rpn_box_reg: 0.0097 (0.0193)  time: 2.0950  data: 0.0462  max mem: 7010
Epoch: [0]  [ 30/482]  eta: 0:15:52  lr: 0.000327  loss: 1.3235 (2.5105)  loss_classifier: 0.2697 (0.5083)  loss_box_reg: 0.3295 (0.3287)  loss_mask: 0.6481 (1.6156)  loss_objectness: 0.0256 (0.0382)  loss_rpn_box_reg: 0.0169 (0.0197)  time: 2.0544  data: 0.0498  max mem: 7010
Epoch: [0]  [ 40/482]  eta: 0:15:16  lr: 0.000431  loss: 1.1525 (2.1411)  loss_classifier: 0.2526 (0.4368)  loss_box_reg: 0.3490 (0.3244)  loss_mask: 0.4338 (1.3189)  loss_objectness: 0.0398 (0.0401)  loss_rpn_box_reg: 0.0169 (0.0209)  time: 2.0381  data: 0.0498  max mem: 7010
Epoch: [0]  [ 50/482]  eta: 0:14:54  lr: 0.000535  loss: 0.9247 (1.9162)  loss_classifier: 0.1698 (0.3886)  loss_box_reg: 0.2237 (0.3282)  loss_mask: 0.3867 (1.1401)  loss_objectness: 0.0281 (0.0392)  loss_rpn_box_reg: 0.0114 (0.0202)  time: 2.0131  data: 0.0610  max mem: 7010
Epoch: [0]  [ 60/482]  eta: 0:14:28  lr: 0.000638  loss: 0.8918 (1.7327)  loss_classifier: 0.1380 (0.3478)  loss_box_reg: 0.2182 (0.3140)  loss_mask: 0.3384 (1.0056)  loss_objectness: 0.0281 (0.0426)  loss_rpn_box_reg: 0.0114 (0.0226)  time: 2.0290  data: 0.0806  max mem: 7010
Epoch: [0]  [ 70/482]  eta: 0:14:18  lr: 0.000742  loss: 0.6852 (1.5877)  loss_classifier: 0.1196 (0.3171)  loss_box_reg: 0.2182 (0.3067)  loss_mask: 0.2703 (0.9032)  loss_objectness: 0.0281 (0.0394)  loss_rpn_box_reg: 0.0066 (0.0212)  time: 2.1122  data: 0.0643  max mem: 7010
Epoch: [0]  [ 80/482]  eta: 0:13:59  lr: 0.000846  loss: 0.7877 (1.5065)  loss_classifier: 0.1280 (0.2988)  loss_box_reg: 0.3146 (0.3121)  loss_mask: 0.2894 (0.8359)  loss_objectness: 0.0127 (0.0377)  loss_rpn_box_reg: 0.0114 (0.0220)  time: 2.1728  data: 0.0598  max mem: 7397
Epoch: [0]  [ 90/482]  eta: 0:13:40  lr: 0.000950  loss: 0.9356 (1.4376)  loss_classifier: 0.1458 (0.2816)  loss_box_reg: 0.3707 (0.3133)  loss_mask: 0.3574 (0.7842)  loss_objectness: 0.0190 (0.0365)  loss_rpn_box_reg: 0.0180 (0.0220)  time: 2.1258  data: 0.0642  max mem: 7397
Epoch: [0]  [100/482]  eta: 0:13:17  lr: 0.001054  loss: 0.9208 (1.3765)  loss_classifier: 0.1397 (0.2671)  loss_box_reg: 0.3707 (0.3114)  loss_mask: 0.3647 (0.7410)  loss_objectness: 0.0144 (0.0351)  loss_rpn_box_reg: 0.0127 (0.0219)  time: 2.0945  data: 0.0482  max mem: 7397
Epoch: [0]  [110/482]  eta: 0:12:57  lr: 0.001158  loss: 0.7449 (1.3182)  loss_classifier: 0.1145 (0.2534)  loss_box_reg: 0.3175 (0.3092)  loss_mask: 0.3104 (0.7013)  loss_objectness: 0.0113 (0.0331)  loss_rpn_box_reg: 0.0096 (0.0213)  time: 2.0841  data: 0.0597  max mem: 7397
Epoch: [0]  [120/482]  eta: 0:12:37  lr: 0.001262  loss: 0.8370 (1.2859)  loss_classifier: 0.1253 (0.2448)  loss_box_reg: 0.3549 (0.3110)  loss_mask: 0.3249 (0.6721)  loss_objectness: 0.0138 (0.0340)  loss_rpn_box_reg: 0.0193 (0.0240)  time: 2.1204  data: 0.0764  max mem: 7397
Epoch: [0]  [130/482]  eta: 0:12:17  lr: 0.001365  loss: 0.7976 (1.2434)  loss_classifier: 0.1197 (0.2353)  loss_box_reg: 0.2635 (0.3046)  loss_mask: 0.3405 (0.6463)  loss_objectness: 0.0226 (0.0334)  loss_rpn_box_reg: 0.0215 (0.0238)  time: 2.1225  data: 0.0722  max mem: 7397
Epoch: [0]  [140/482]  eta: 0:11:59  lr: 0.001469  loss: 0.7081 (1.2095)  loss_classifier: 0.1068 (0.2274)  loss_box_reg: 0.2080 (0.3000)  loss_mask: 0.3405 (0.6248)  loss_objectness: 0.0217 (0.0327)  loss_rpn_box_reg: 0.0171 (0.0246)  time: 2.1536  data: 0.0611  max mem: 7397
Epoch: [0]  [150/482]  eta: 0:11:37  lr: 0.001573  loss: 0.7522 (1.1775)  loss_classifier: 0.0847 (0.2193)  loss_box_reg: 0.1924 (0.2910)  loss_mask: 0.3606 (0.6111)  loss_objectness: 0.0174 (0.0320)  loss_rpn_box_reg: 0.0199 (0.0241)  time: 2.1382  data: 0.0556  max mem: 7397
Epoch: [0]  [160/482]  eta: 0:11:14  lr: 0.001677  loss: 0.7844 (1.1583)  loss_classifier: 0.1275 (0.2149)  loss_box_reg: 0.1283 (0.2864)  loss_mask: 0.3421 (0.5944)  loss_objectness: 0.0413 (0.0379)  loss_rpn_box_reg: 0.0124 (0.0246)  time: 2.0422  data: 0.0565  max mem: 7397
Epoch: [0]  [170/482]  eta: 0:10:53  lr: 0.001781  loss: 0.7441 (1.1383)  loss_classifier: 0.1275 (0.2104)  loss_box_reg: 0.1731 (0.2818)  loss_mask: 0.3421 (0.5821)  loss_objectness: 0.0512 (0.0393)  loss_rpn_box_reg: 0.0140 (0.0247)  time: 2.0471  data: 0.0580  max mem: 7397
Epoch: [0]  [180/482]  eta: 0:10:30  lr: 0.001885  loss: 0.7389 (1.1167)  loss_classifier: 0.1088 (0.2058)  loss_box_reg: 0.1782 (0.2773)  loss_mask: 0.3572 (0.5695)  loss_objectness: 0.0371 (0.0394)  loss_rpn_box_reg: 0.0169 (0.0247)  time: 2.0214  data: 0.0548  max mem: 7397
Epoch: [0]  [190/482]  eta: 0:10:09  lr: 0.001988  loss: 0.7369 (1.0932)  loss_classifier: 0.1137 (0.2013)  loss_box_reg: 0.1782 (0.2730)  loss_mask: 0.3162 (0.5559)  loss_objectness: 0.0230 (0.0385)  loss_rpn_box_reg: 0.0142 (0.0244)  time: 2.0172  data: 0.0557  max mem: 7472
Epoch: [0]  [200/482]  eta: 0:09:49  lr: 0.002092  loss: 0.6406 (1.0779)  loss_classifier: 0.1390 (0.1998)  loss_box_reg: 0.1775 (0.2714)  loss_mask: 0.3061 (0.5442)  loss_objectness: 0.0176 (0.0383)  loss_rpn_box_reg: 0.0076 (0.0242)  time: 2.1139  data: 0.0526  max mem: 7472
Epoch: [0]  [210/482]  eta: 0:09:27  lr: 0.002196  loss: 0.6591 (1.0593)  loss_classifier: 0.1058 (0.1949)  loss_box_reg: 0.1948 (0.2685)  loss_mask: 0.3317 (0.5347)  loss_objectness: 0.0178 (0.0372)  loss_rpn_box_reg: 0.0128 (0.0239)  time: 2.0869  data: 0.0533  max mem: 7472
Epoch: [0]  [220/482]  eta: 0:09:05  lr: 0.002300  loss: 0.5995 (1.0413)  loss_classifier: 0.0771 (0.1903)  loss_box_reg: 0.1384 (0.2628)  loss_mask: 0.3158 (0.5275)  loss_objectness: 0.0091 (0.0368)  loss_rpn_box_reg: 0.0132 (0.0239)  time: 2.0243  data: 0.0517  max mem: 7472
Epoch: [0]  [230/482]  eta: 0:08:45  lr: 0.002404  loss: 0.5488 (1.0206)  loss_classifier: 0.0771 (0.1861)  loss_box_reg: 0.1384 (0.2585)  loss_mask: 0.3013 (0.5169)  loss_objectness: 0.0074 (0.0357)  loss_rpn_box_reg: 0.0060 (0.0234)  time: 2.0572  data: 0.0482  max mem: 7472
Epoch: [0]  [240/482]  eta: 0:08:24  lr: 0.002508  loss: 0.5726 (1.0085)  loss_classifier: 0.0902 (0.1839)  loss_box_reg: 0.1435 (0.2558)  loss_mask: 0.3014 (0.5098)  loss_objectness: 0.0091 (0.0353)  loss_rpn_box_reg: 0.0133 (0.0237)  time: 2.1009  data: 0.0742  max mem: 7472
Epoch: [0]  [250/482]  eta: 0:08:03  lr: 0.002612  loss: 0.5975 (0.9965)  loss_classifier: 0.0902 (0.1816)  loss_box_reg: 0.1355 (0.2537)  loss_mask: 0.3355 (0.5032)  loss_objectness: 0.0154 (0.0347)  loss_rpn_box_reg: 0.0170 (0.0233)  time: 2.0566  data: 0.0683  max mem: 7472
Epoch: [0]  [260/482]  eta: 0:07:43  lr: 0.002715  loss: 0.6450 (0.9821)  loss_classifier: 0.0816 (0.1786)  loss_box_reg: 0.1494 (0.2506)  loss_mask: 0.3354 (0.4959)  loss_objectness: 0.0153 (0.0341)  loss_rpn_box_reg: 0.0119 (0.0229)  time: 2.1059  data: 0.0468  max mem: 7472
Epoch: [0]  [270/482]  eta: 0:07:23  lr: 0.002819  loss: 0.6738 (0.9781)  loss_classifier: 0.1087 (0.1784)  loss_box_reg: 0.1711 (0.2504)  loss_mask: 0.3463 (0.4899)  loss_objectness: 0.0163 (0.0361)  loss_rpn_box_reg: 0.0119 (0.0233)  time: 2.1858  data: 0.0666  max mem: 7472
Epoch: [0]  [280/482]  eta: 0:07:01  lr: 0.002923  loss: 0.6262 (0.9664)  loss_classifier: 0.0892 (0.1756)  loss_box_reg: 0.1449 (0.2462)  loss_mask: 0.3501 (0.4849)  loss_objectness: 0.0280 (0.0364)  loss_rpn_box_reg: 0.0087 (0.0232)  time: 2.1137  data: 0.0605  max mem: 7472
Epoch: [0]  [290/482]  eta: 0:06:40  lr: 0.003027  loss: 0.6054 (0.9550)  loss_classifier: 0.0817 (0.1727)  loss_box_reg: 0.0929 (0.2428)  loss_mask: 0.3019 (0.4783)  loss_objectness: 0.0440 (0.0382)  loss_rpn_box_reg: 0.0113 (0.0231)  time: 2.0563  data: 0.0434  max mem: 7472
Epoch: [0]  [300/482]  eta: 0:06:20  lr: 0.003131  loss: 0.6487 (0.9488)  loss_classifier: 0.0922 (0.1709)  loss_box_reg: 0.1191 (0.2404)  loss_mask: 0.3320 (0.4754)  loss_objectness: 0.0427 (0.0389)  loss_rpn_box_reg: 0.0121 (0.0232)  time: 2.0836  data: 0.0515  max mem: 7472
Epoch: [0]  [310/482]  eta: 0:05:59  lr: 0.003235  loss: 0.7082 (0.9449)  loss_classifier: 0.0969 (0.1694)  loss_box_reg: 0.1515 (0.2396)  loss_mask: 0.3683 (0.4714)  loss_objectness: 0.0424 (0.0398)  loss_rpn_box_reg: 0.0133 (0.0247)  time: 2.0858  data: 0.0690  max mem: 7472
Epoch: [0]  [320/482]  eta: 0:05:38  lr: 0.003338  loss: 0.7512 (0.9427)  loss_classifier: 0.0994 (0.1687)  loss_box_reg: 0.1919 (0.2402)  loss_mask: 0.3849 (0.4693)  loss_objectness: 0.0353 (0.0396)  loss_rpn_box_reg: 0.0357 (0.0250)  time: 2.1452  data: 0.0762  max mem: 7472
Epoch: [0]  [330/482]  eta: 0:05:18  lr: 0.003442  loss: 0.8277 (0.9388)  loss_classifier: 0.1592 (0.1682)  loss_box_reg: 0.2393 (0.2398)  loss_mask: 0.3809 (0.4659)  loss_objectness: 0.0306 (0.0397)  loss_rpn_box_reg: 0.0325 (0.0252)  time: 2.2168  data: 0.0701  max mem: 7472
Epoch: [0]  [340/482]  eta: 0:04:57  lr: 0.003546  loss: 0.6757 (0.9307)  loss_classifier: 0.1065 (0.1662)  loss_box_reg: 0.1474 (0.2375)  loss_mask: 0.3438 (0.4626)  loss_objectness: 0.0306 (0.0396)  loss_rpn_box_reg: 0.0133 (0.0248)  time: 2.1240  data: 0.0569  max mem: 7472
Epoch: [0]  [350/482]  eta: 0:04:35  lr: 0.003650  loss: 0.5709 (0.9201)  loss_classifier: 0.0669 (0.1634)  loss_box_reg: 0.1112 (0.2336)  loss_mask: 0.3369 (0.4597)  loss_objectness: 0.0229 (0.0388)  loss_rpn_box_reg: 0.0117 (0.0246)  time: 1.9926  data: 0.0469  max mem: 7472
Epoch: [0]  [360/482]  eta: 0:04:14  lr: 0.003754  loss: 0.6046 (0.9188)  loss_classifier: 0.1003 (0.1641)  loss_box_reg: 0.1324 (0.2334)  loss_mask: 0.3303 (0.4571)  loss_objectness: 0.0204 (0.0392)  loss_rpn_box_reg: 0.0121 (0.0250)  time: 2.0180  data: 0.0626  max mem: 7472
Epoch: [0]  [370/482]  eta: 0:03:53  lr: 0.003858  loss: 0.6006 (0.9078)  loss_classifier: 0.0758 (0.1615)  loss_box_reg: 0.0887 (0.2301)  loss_mask: 0.3290 (0.4529)  loss_objectness: 0.0181 (0.0386)  loss_rpn_box_reg: 0.0084 (0.0247)  time: 2.0244  data: 0.0655  max mem: 7472
Epoch: [0]  [380/482]  eta: 0:03:32  lr: 0.003962  loss: 0.5364 (0.9007)  loss_classifier: 0.0682 (0.1604)  loss_box_reg: 0.0931 (0.2289)  loss_mask: 0.2617 (0.4485)  loss_objectness: 0.0139 (0.0383)  loss_rpn_box_reg: 0.0050 (0.0247)  time: 2.0151  data: 0.0667  max mem: 7472
Epoch: [0]  [390/482]  eta: 0:03:11  lr: 0.004065  loss: 0.7302 (0.9014)  loss_classifier: 0.1498 (0.1610)  loss_box_reg: 0.2004 (0.2305)  loss_mask: 0.3288 (0.4462)  loss_objectness: 0.0307 (0.0386)  loss_rpn_box_reg: 0.0100 (0.0250)  time: 2.1033  data: 0.0753  max mem: 7472
Epoch: [0]  [400/482]  eta: 0:02:51  lr: 0.004169  loss: 0.7474 (0.8952)  loss_classifier: 0.1134 (0.1600)  loss_box_reg: 0.2025 (0.2292)  loss_mask: 0.3490 (0.4431)  loss_objectness: 0.0315 (0.0381)  loss_rpn_box_reg: 0.0130 (0.0248)  time: 2.1243  data: 0.0560  max mem: 7472
Epoch: [0]  [410/482]  eta: 0:02:30  lr: 0.004273  loss: 0.5860 (0.8895)  loss_classifier: 0.0841 (0.1584)  loss_box_reg: 0.1454 (0.2277)  loss_mask: 0.3100 (0.4401)  loss_objectness: 0.0135 (0.0377)  loss_rpn_box_reg: 0.0110 (0.0257)  time: 2.1124  data: 0.0537  max mem: 7472
Epoch: [0]  [420/482]  eta: 0:02:09  lr: 0.004377  loss: 0.6747 (0.8857)  loss_classifier: 0.0979 (0.1575)  loss_box_reg: 0.1829 (0.2272)  loss_mask: 0.3432 (0.4379)  loss_objectness: 0.0167 (0.0374)  loss_rpn_box_reg: 0.0182 (0.0256)  time: 2.0534  data: 0.0636  max mem: 7472
Epoch: [0]  [430/482]  eta: 0:01:48  lr: 0.004481  loss: 0.6763 (0.8817)  loss_classifier: 0.1123 (0.1566)  loss_box_reg: 0.2011 (0.2271)  loss_mask: 0.3441 (0.4354)  loss_objectness: 0.0201 (0.0370)  loss_rpn_box_reg: 0.0182 (0.0256)  time: 1.9787  data: 0.0590  max mem: 7472
Epoch: [0]  [440/482]  eta: 0:01:27  lr: 0.004585  loss: 0.5922 (0.8753)  loss_classifier: 0.0770 (0.1549)  loss_box_reg: 0.1138 (0.2247)  loss_mask: 0.3409 (0.4336)  loss_objectness: 0.0169 (0.0367)  loss_rpn_box_reg: 0.0143 (0.0255)  time: 2.0144  data: 0.0529  max mem: 7472
Epoch: [0]  [450/482]  eta: 0:01:06  lr: 0.004688  loss: 0.5922 (0.8718)  loss_classifier: 0.0838 (0.1539)  loss_box_reg: 0.1332 (0.2244)  loss_mask: 0.3409 (0.4317)  loss_objectness: 0.0113 (0.0363)  loss_rpn_box_reg: 0.0107 (0.0254)  time: 2.0415  data: 0.0510  max mem: 7472
Epoch: [0]  [460/482]  eta: 0:00:45  lr: 0.004792  loss: 0.7216 (0.8688)  loss_classifier: 0.1217 (0.1534)  loss_box_reg: 0.2018 (0.2241)  loss_mask: 0.3192 (0.4296)  loss_objectness: 0.0107 (0.0362)  loss_rpn_box_reg: 0.0123 (0.0254)  time: 2.0111  data: 0.0544  max mem: 7472
Epoch: [0]  [470/482]  eta: 0:00:24  lr: 0.004896  loss: 0.6759 (0.8650)  loss_classifier: 0.1234 (0.1532)  loss_box_reg: 0.1583 (0.2228)  loss_mask: 0.3192 (0.4277)  loss_objectness: 0.0207 (0.0359)  loss_rpn_box_reg: 0.0134 (0.0254)  time: 1.9929  data: 0.0480  max mem: 7472
Epoch: [0]  [480/482]  eta: 0:00:04  lr: 0.005000  loss: 0.6775 (0.8623)  loss_classifier: 0.1407 (0.1530)  loss_box_reg: 0.1559 (0.2217)  loss_mask: 0.3240 (0.4261)  loss_objectness: 0.0219 (0.0360)  loss_rpn_box_reg: 0.0148 (0.0254)  time: 1.9480  data: 0.0479  max mem: 7472
Epoch: [0]  [481/482]  eta: 0:00:02  lr: 0.005000  loss: 0.6555 (0.8611)  loss_classifier: 0.1407 (0.1528)  loss_box_reg: 0.1366 (0.2213)  loss_mask: 0.3131 (0.4257)  loss_objectness: 0.0219 (0.0359)  loss_rpn_box_reg: 0.0134 (0.0254)  time: 1.9022  data: 0.0456  max mem: 7472
Epoch: [0] Total time: 0:16:38 (2.0720 s / it)
creating index...
index created!
Test:  [  0/321]  eta: 0:03:07  model_time: 0.5358 (0.5358)  evaluator_time: 0.0336 (0.0336)  time: 0.5840  data: 0.0141  max mem: 7472
Test:  [100/321]  eta: 0:01:56  model_time: 0.4708 (0.4680)  evaluator_time: 0.0227 (0.0377)  time: 0.5279  data: 0.0188  max mem: 7472
Test:  [200/321]  eta: 0:01:04  model_time: 0.4536 (0.4670)  evaluator_time: 0.0357 (0.0438)  time: 0.5308  data: 0.0233  max mem: 7472
Test:  [300/321]  eta: 0:00:11  model_time: 0.4652 (0.4660)  evaluator_time: 0.0366 (0.0433)  time: 0.5570  data: 0.0269  max mem: 7472
Test:  [320/321]  eta: 0:00:00  model_time: 0.4608 (0.4663)  evaluator_time: 0.0370 (0.0441)  time: 0.5569  data: 0.0280  max mem: 7472
Test: Total time: 0:02:51 (0.5352 s / it)
Averaged stats: model_time: 0.4608 (0.4663)  evaluator_time: 0.0370 (0.0441)
Accumulating evaluation results...
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.14s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.338
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.665
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.296
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.208
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.402
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.488
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.140
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.405
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.485
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.374
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.545
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.614
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.295
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.630
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.158
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.355
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.466
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.126
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.356
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.422
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.304
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.487
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.553
import matplotlib.pyplot as plt
plt.title('Training Loss') 
losses = [np.mean(list(trn_history[i].meters['loss'].deque)) for i in range(len(trn_history))]
plt.plot(losses)
model.eval()
im = dataset_test[10][0]
show(im)
with torch.no_grad():
    prediction = model([im.to(device)])
    for i in range(len(prediction[0]['masks'])):
        plt.imshow(Image.fromarray(prediction[0]['masks'][i, 0].mul(255).byte().cpu().numpy()))
        plt.title('Class: '+str(prediction[0]['labels'][i].cpu().numpy())+' Score:'+str(prediction[0]['scores'][i].cpu().numpy()))
        plt.show()