init

f6d38d05 · blu · 27cd40b2 · f6d38d05 · f6d38d05 · f6d38d05
--- a/opencv-yolo/deployment/docker/edet.Dockerfile
+++ b/opencv-yolo/deployment/docker/edet.Dockerfile
+FROM python:slim
+
+ENV MAINTAINER=Bruce.Lu
+WORKDIR /apps/app
+RUN apt -qq update && apt install -y redis-server
+
+ENV BIN_PRE=/usr/local/bin/python
+ENV BIN_NAME=web/detect_video.py
+ENV DL_DIR=/data
+#ENV REDIS=
+ENV CFG_DIR=/apps/app/
+ENV BIN_DIR=/apps/app/
+
+COPY opencv-yolo/web/web.py /apps/app/
+COPY opencv-yolo/web/web.py /apps/app/
+COPY opencv-yolo/web/detect_video.py /apps/app/
+
+COPY opencv-yolo/web/requirement.txt /apps/app/
+RUN pip install -r requirement.txt
+
+COPY opencv-yolo/web/start.sh /apps/app
+
+EXPOSE 5555
+EXPOSE 5000
+CMD ["./start.sh"]
\ No newline at end of file
--- a/opencv-yolo/web/detect_video.py
+++ b/opencv-yolo/web/detect_video.py
+#!python
+import argparse
+import torch
+from src.config import COCO_CLASSES, colors
+import cv2, datetime
+import numpy as np
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        "EfficientDet: Scalable and Efficient Object Detection implementation by Signatrix GmbH")
+    parser.add_argument("--image_size", type=int, default=512, help="The common width and height for all images")
+    parser.add_argument("--cls_threshold", type=float, default=0.5)
+    parser.add_argument("--nms_threshold", type=float, default=0.5)
+    parser.add_argument("-c", "--pretrained_model", type=str, default="signatrix_efficientdet_coco.pth")
+    parser.add_argument("input", type=str, default="input.mp4")
+    parser.add_argument("-o", "--output", type=str, default="detect_person.jpg")
+    args = parser.parse_args()
+    return args
+
+def test(opt):
+    tsEpoch = datetime.datetime.utcfromtimestamp(0)
+    model = torch.load(opt.pretrained_model, map_location='cpu').module
+    if torch.cuda.is_available():
+        model.cuda()
+
+    cap = cv2.VideoCapture(opt.input)
+    bDetected = False
+    strDetMsg = ''
+    ts = int((datetime.datetime.now() - tsEpoch).total_seconds())
+    fname = opt.output + "_" + str(ts) + ".jpg"
+    while cap.isOpened():
+        flag, image = cap.read()
+        output_image = np.copy(image)
+        if flag:
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        else:
+            break
+        height, width = image.shape[:2]
+        image = image.astype(np.float32) / 255
+        image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
+        image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
+        image[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225
+        if height > width:
+            scale = opt.image_size / height
+            resized_height = opt.image_size
+            resized_width = int(width * scale)
+        else:
+            scale = opt.image_size / width
+            resized_height = int(height * scale)
+            resized_width = opt.image_size
+
+        image = cv2.resize(image, (resized_width, resized_height))
+
+        new_image = np.zeros((opt.image_size, opt.image_size, 3))
+        new_image[0:resized_height, 0:resized_width] = image
+        new_image = np.transpose(new_image, (2, 0, 1))
+        new_image = new_image[None, :, :, :]
+        new_image = torch.Tensor(new_image)
+        if torch.cuda.is_available():
+            new_image = new_image.cuda()
+        with torch.no_grad():
+            scores, labels, boxes = model(new_image)
+            boxes /= scale
+        if boxes.shape[0] == 0:
+            continue
+
+        for box_id in range(boxes.shape[0]):
+            pred_prob = float(scores[box_id])
+            if pred_prob < opt.cls_threshold:
+                continue
+            pred_label = int(labels[box_id])
+            if COCO_CLASSES[pred_label] != 'person':
+                continue
+            xmin, ymin, xmax, ymax = boxes[box_id, :]
+            color = colors[pred_label]
+            color = (255,0,0)
+            font = cv2.FONT_HERSHEY_PLAIN
+            cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 1)
+            text_size = cv2.getTextSize(COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, font, 1, 1)[0]
+            cv2.rectangle(output_image, (xmin, ymin), (xmin + text_size[0] + 1, ymin + text_size[1] + 1), color, -1)
+            cv2.putText(
+                output_image, COCO_CLASSES[pred_label] + ': %.3f' % pred_prob,
+                (xmin, ymin + text_size[1] + 1), font, 1,
+                (255, 255, 255), 1)
+            if not bDetected:
+              strDetMsg = "edet found human {:.3f} x: {}, y: {}, w: {}, h: {}; written image: {}".format(pred_prob, int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin), fname)
+              bDetected = True
+        if bDetected:
+          cv2.imwrite(fname, output_image)
+          print(strDetMsg)
+          break
+    cap.release()
+
+if __name__ == "__main__":
+    opt = get_args()
+    opt.output = opt.output[0:opt.output.rfind(".")]
+    test(opt)
--- a/opencv-yolo/web/edet_model.pth
+++ b/opencv-yolo/web/edet_model.pth
--- a/opencv-yolo/web/requirement.txt
+++ b/opencv-yolo/web/requirement.txt
+flask
+cerberus
+celery
+flower
+paho-mqtt
+azure-storage-file-share
+pyyaml
+redis
+efficientnet_pytorch
+tensorboardX
+pycocotools
\ No newline at end of file
--- a/opencv-yolo/web/src/config.py
+++ b/opencv-yolo/web/src/config.py
+COCO_CLASSES = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+                "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+                "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
+                "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+                "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+                "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
+                "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
+                "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+                "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
+                "teddy bear", "hair drier", "toothbrush"]
+
+colors = [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), (14, 89, 122),
+          (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, 132), (169, 158, 85), (188, 185, 26), (103, 1, 17),
+          (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60),
+          (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, 159), (182, 173, 32), (34, 113, 133), (90, 135, 34),
+          (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, 112), (89, 60, 55), (15, 54, 88), (112, 75, 181),
+          (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108),
+          (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, 148), (182, 101, 89), (44, 65, 179), (1, 33, 26),
+          (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50),
+          (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33),
+          (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91),
+          (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, 106), (113, 68, 54), (136, 116, 112), (119, 139, 130),
+          (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, 180), (49, 119, 155), (153, 50, 183), (125, 38, 3),
+          (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, 148), (28, 144, 118), (29, 9, 24), (175, 45, 108),
+          (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95),
+          (2, 20, 184), (122, 37, 185)]
--- a/opencv-yolo/web/src/dataset.py
+++ b/opencv-yolo/web/src/dataset.py
+import os
+import torch
+import numpy as np
+
+from torch.utils.data import Dataset, DataLoader
+from pycocotools.coco import COCO
+import cv2
+
+
+class CocoDataset(Dataset):
+    def __init__(self, root_dir, set='train2017', transform=None):
+
+        self.root_dir = root_dir
+        self.set_name = set
+        self.transform = transform
+
+        self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
+        self.image_ids = self.coco.getImgIds()
+
+        self.load_classes()
+
+    def load_classes(self):
+
+        # load class names (name -> label)
+        categories = self.coco.loadCats(self.coco.getCatIds())
+        categories.sort(key=lambda x: x['id'])
+
+        self.classes = {}
+        self.coco_labels = {}
+        self.coco_labels_inverse = {}
+        for c in categories:
+            self.coco_labels[len(self.classes)] = c['id']
+            self.coco_labels_inverse[c['id']] = len(self.classes)
+            self.classes[c['name']] = len(self.classes)
+
+        # also load the reverse (label -> name)
+        self.labels = {}
+        for key, value in self.classes.items():
+            self.labels[value] = key
+
+    def __len__(self):
+        return len(self.image_ids)
+
+    def __getitem__(self, idx):
+
+        img = self.load_image(idx)
+        annot = self.load_annotations(idx)
+        sample = {'img': img, 'annot': annot}
+        if self.transform:
+            sample = self.transform(sample)
+        return sample
+
+    def load_image(self, image_index):
+        image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
+        path = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
+        img = cv2.imread(path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        # if len(img.shape) == 2:
+        #     img = skimage.color.gray2rgb(img)
+
+        return img.astype(np.float32) / 255.
+
+    def load_annotations(self, image_index):
+        # get ground truth annotations
+        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
+        annotations = np.zeros((0, 5))
+
+        # some images appear to miss annotations
+        if len(annotations_ids) == 0:
+            return annotations
+
+        # parse annotations
+        coco_annotations = self.coco.loadAnns(annotations_ids)
+        for idx, a in enumerate(coco_annotations):
+
+            # some annotations have basically no width / height, skip them
+            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
+                continue
+
+            annotation = np.zeros((1, 5))
+            annotation[0, :4] = a['bbox']
+            annotation[0, 4] = self.coco_label_to_label(a['category_id'])
+            annotations = np.append(annotations, annotation, axis=0)
+
+        # transform from [x, y, w, h] to [x1, y1, x2, y2]
+        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
+        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
+
+        return annotations
+
+    def coco_label_to_label(self, coco_label):
+        return self.coco_labels_inverse[coco_label]
+
+    def label_to_coco_label(self, label):
+        return self.coco_labels[label]
+
+    def num_classes(self):
+        return 80
+
+
+def collater(data):
+    imgs = [s['img'] for s in data]
+    annots = [s['annot'] for s in data]
+    scales = [s['scale'] for s in data]
+
+    imgs = torch.from_numpy(np.stack(imgs, axis=0))
+
+    max_num_annots = max(annot.shape[0] for annot in annots)
+
+    if max_num_annots > 0:
+
+        annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
+
+        if max_num_annots > 0:
+            for idx, annot in enumerate(annots):
+                if annot.shape[0] > 0:
+                    annot_padded[idx, :annot.shape[0], :] = annot
+    else:
+        annot_padded = torch.ones((len(annots), 1, 5)) * -1
+
+    imgs = imgs.permute(0, 3, 1, 2)
+
+    return {'img': imgs, 'annot': annot_padded, 'scale': scales}
+
+
+class Resizer(object):
+    """Convert ndarrays in sample to Tensors."""
+
+    def __call__(self, sample, common_size=512):
+        image, annots = sample['img'], sample['annot']
+        height, width, _ = image.shape
+        if height > width:
+            scale = common_size / height
+            resized_height = common_size
+            resized_width = int(width * scale)
+        else:
+            scale = common_size / width
+            resized_height = int(height * scale)
+            resized_width = common_size
+
+        image = cv2.resize(image, (resized_width, resized_height))
+
+        new_image = np.zeros((common_size, common_size, 3))
+        new_image[0:resized_height, 0:resized_width] = image
+
+        annots[:, :4] *= scale
+
+        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
+
+
+class Augmenter(object):
+    """Convert ndarrays in sample to Tensors."""
+
+    def __call__(self, sample, flip_x=0.5):
+        if np.random.rand() < flip_x:
+            image, annots = sample['img'], sample['annot']
+            image = image[:, ::-1, :]
+
+            rows, cols, channels = image.shape
+
+            x1 = annots[:, 0].copy()
+            x2 = annots[:, 2].copy()
+
+            x_tmp = x1.copy()
+
+            annots[:, 0] = cols - x2
+            annots[:, 2] = cols - x_tmp
+
+            sample = {'img': image, 'annot': annots}
+
+        return sample
+
+
+class Normalizer(object):
+
+    def __init__(self):
+        self.mean = np.array([[[0.485, 0.456, 0.406]]])
+        self.std = np.array([[[0.229, 0.224, 0.225]]])
+
+    def __call__(self, sample):
+        image, annots = sample['img'], sample['annot']
+
+        return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots}
--- a/opencv-yolo/web/src/loss.py
+++ b/opencv-yolo/web/src/loss.py
+import torch
+import torch.nn as nn
+
+
+def calc_iou(a, b):
+
+    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+    iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
+    ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
+    iw = torch.clamp(iw, min=0)
+    ih = torch.clamp(ih, min=0)
+    ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
+    ua = torch.clamp(ua, min=1e-8)
+    intersection = iw * ih
+    IoU = intersection / ua
+
+    return IoU
+
+
+class FocalLoss(nn.Module):
+    def __init__(self):
+        super(FocalLoss, self).__init__()
+
+    def forward(self, classifications, regressions, anchors, annotations):
+        alpha = 0.25
+        gamma = 2.0
+        batch_size = classifications.shape[0]
+        classification_losses = []
+        regression_losses = []
+
+        anchor = anchors[0, :, :]
+
+        anchor_widths = anchor[:, 2] - anchor[:, 0]
+        anchor_heights = anchor[:, 3] - anchor[:, 1]
+        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
+        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
+
+        for j in range(batch_size):
+
+            classification = classifications[j, :, :]
+            regression = regressions[j, :, :]
+
+            bbox_annotation = annotations[j, :, :]
+            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
+
+            if bbox_annotation.shape[0] == 0:
+                if torch.cuda.is_available():
+                    regression_losses.append(torch.tensor(0).float().cuda())
+                    classification_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_losses.append(torch.tensor(0).float())
+                    classification_losses.append(torch.tensor(0).float())
+
+                continue
+
+            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
+
+            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])
+
+            IoU_max, IoU_argmax = torch.max(IoU, dim=1)
+
+            # compute the loss for classification
+            targets = torch.ones(classification.shape) * -1
+            if torch.cuda.is_available():
+                targets = targets.cuda()
+
+            targets[torch.lt(IoU_max, 0.4), :] = 0
+
+            positive_indices = torch.ge(IoU_max, 0.5)
+
+            num_positive_anchors = positive_indices.sum()
+
+            assigned_annotations = bbox_annotation[IoU_argmax, :]
+
+            targets[positive_indices, :] = 0
+            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
+
+            alpha_factor = torch.ones(targets.shape) * alpha
+            if torch.cuda.is_available():
+                alpha_factor = alpha_factor.cuda()
+
+            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
+            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
+            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
+
+            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
+
+            cls_loss = focal_weight * bce
+
+            zeros = torch.zeros(cls_loss.shape)
+            if torch.cuda.is_available():
+                zeros = zeros.cuda()
+            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)
+
+            classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
+
+
+            if positive_indices.sum() > 0:
+                assigned_annotations = assigned_annotations[positive_indices, :]
+
+                anchor_widths_pi = anchor_widths[positive_indices]
+                anchor_heights_pi = anchor_heights[positive_indices]
+                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
+                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
+
+                gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
+                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
+                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
+                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
+
+                gt_widths = torch.clamp(gt_widths, min=1)
+                gt_heights = torch.clamp(gt_heights, min=1)
+
+                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
+                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
+                targets_dw = torch.log(gt_widths / anchor_widths_pi)
+                targets_dh = torch.log(gt_heights / anchor_heights_pi)
+
+                targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
+                targets = targets.t()
+
+                norm = torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
+                if torch.cuda.is_available():
+                    norm = norm.cuda()
+                targets = targets / norm
+
+                regression_diff = torch.abs(targets - regression[positive_indices, :])
+
+                regression_loss = torch.where(
+                    torch.le(regression_diff, 1.0 / 9.0),
+                    0.5 * 9.0 * torch.pow(regression_diff, 2),
+                    regression_diff - 0.5 / 9.0
+                )
+                regression_losses.append(regression_loss.mean())
+            else:
+                if torch.cuda.is_available():
+                    regression_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_losses.append(torch.tensor(0).float())
+
+        return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0,
+                                                                                                                 keepdim=True)
--- a/opencv-yolo/web/src/model.py
+++ b/opencv-yolo/web/src/model.py
+import torch.nn as nn
+import torch
+import math
+from efficientnet_pytorch import EfficientNet as EffNet
+from src.utils import BBoxTransform, ClipBoxes, Anchors
+from src.loss import FocalLoss
+from torchvision.ops.boxes import nms as nms_torch
+
+
+def nms(dets, thresh):
+    return nms_torch(dets[:, :4], dets[:, 4], thresh)
+
+
+class ConvBlock(nn.Module):
+    def __init__(self, num_channels):
+        super(ConvBlock, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(num_channels, num_channels, kernel_size=3, stride=1, padding=1, groups=num_channels),
+            nn.Conv2d(num_channels, num_channels, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(num_features=num_channels, momentum=0.9997, eps=4e-5), nn.ReLU())
+
+    def forward(self, input):
+        return self.conv(input)
+
+
+class BiFPN(nn.Module):
+    def __init__(self, num_channels, epsilon=1e-4):
+        super(BiFPN, self).__init__()
+        self.epsilon = epsilon
+        # Conv layers
+        self.conv6_up = ConvBlock(num_channels)
+        self.conv5_up = ConvBlock(num_channels)
+        self.conv4_up = ConvBlock(num_channels)
+        self.conv3_up = ConvBlock(num_channels)
+        self.conv4_down = ConvBlock(num_channels)
+        self.conv5_down = ConvBlock(num_channels)
+        self.conv6_down = ConvBlock(num_channels)
+        self.conv7_down = ConvBlock(num_channels)
+
+        # Feature scaling layers
+        self.p6_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p5_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p4_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p3_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+
+        self.p4_downsample = nn.MaxPool2d(kernel_size=2)
+        self.p5_downsample = nn.MaxPool2d(kernel_size=2)
+        self.p6_downsample = nn.MaxPool2d(kernel_size=2)
+        self.p7_downsample = nn.MaxPool2d(kernel_size=2)
+
+        # Weight
+        self.p6_w1 = nn.Parameter(torch.ones(2))
+        self.p6_w1_relu = nn.ReLU()
+        self.p5_w1 = nn.Parameter(torch.ones(2))
+        self.p5_w1_relu = nn.ReLU()
+        self.p4_w1 = nn.Parameter(torch.ones(2))
+        self.p4_w1_relu = nn.ReLU()
+        self.p3_w1 = nn.Parameter(torch.ones(2))
+        self.p3_w1_relu = nn.ReLU()
+
+        self.p4_w2 = nn.Parameter(torch.ones(3))
+        self.p4_w2_relu = nn.ReLU()
+        self.p5_w2 = nn.Parameter(torch.ones(3))
+        self.p5_w2_relu = nn.ReLU()
+        self.p6_w2 = nn.Parameter(torch.ones(3))
+        self.p6_w2_relu = nn.ReLU()
+        self.p7_w2 = nn.Parameter(torch.ones(2))
+        self.p7_w2_relu = nn.ReLU()
+
+    def forward(self, inputs):
+        """
+            P7_0 -------------------------- P7_2 -------->
+
+            P6_0 ---------- P6_1 ---------- P6_2 -------->
+
+            P5_0 ---------- P5_1 ---------- P5_2 -------->
+
+            P4_0 ---------- P4_1 ---------- P4_2 -------->
+
+            P3_0 -------------------------- P3_2 -------->
+        """
+
+        # P3_0, P4_0, P5_0, P6_0 and P7_0
+        p3_in, p4_in, p5_in, p6_in, p7_in = inputs
+        # P7_0 to P7_2
+        # Weights for P6_0 and P7_0 to P6_1
+        p6_w1 = self.p6_w1_relu(self.p6_w1)
+        weight = p6_w1 / (torch.sum(p6_w1, dim=0) + self.epsilon)
+        # Connections for P6_0 and P7_0 to P6_1 respectively
+        p6_up = self.conv6_up(weight[0] * p6_in + weight[1] * self.p6_upsample(p7_in))
+        # Weights for P5_0 and P6_0 to P5_1
+        p5_w1 = self.p5_w1_relu(self.p5_w1)
+        weight = p5_w1 / (torch.sum(p5_w1, dim=0) + self.epsilon)
+        # Connections for P5_0 and P6_0 to P5_1 respectively
+        p5_up = self.conv5_up(weight[0] * p5_in + weight[1] * self.p5_upsample(p6_up))
+        # Weights for P4_0 and P5_0 to P4_1
+        p4_w1 = self.p4_w1_relu(self.p4_w1)
+        weight = p4_w1 / (torch.sum(p4_w1, dim=0) + self.epsilon)
+        # Connections for P4_0 and P5_0 to P4_1 respectively
+        p4_up = self.conv4_up(weight[0] * p4_in + weight[1] * self.p4_upsample(p5_up))
+
+        # Weights for P3_0 and P4_1 to P3_2
+        p3_w1 = self.p3_w1_relu(self.p3_w1)
+        weight = p3_w1 / (torch.sum(p3_w1, dim=0) + self.epsilon)
+        # Connections for P3_0 and P4_1 to P3_2 respectively
+        p3_out = self.conv3_up(weight[0] * p3_in + weight[1] * self.p3_upsample(p4_up))
+
+        # Weights for P4_0, P4_1 and P3_2 to P4_2
+        p4_w2 = self.p4_w2_relu(self.p4_w2)
+        weight = p4_w2 / (torch.sum(p4_w2, dim=0) + self.epsilon)
+        # Connections for P4_0, P4_1 and P3_2 to P4_2 respectively
+        p4_out = self.conv4_down(
+            weight[0] * p4_in + weight[1] * p4_up + weight[2] * self.p4_downsample(p3_out))
+        # Weights for P5_0, P5_1 and P4_2 to P5_2
+        p5_w2 = self.p5_w2_relu(self.p5_w2)
+        weight = p5_w2 / (torch.sum(p5_w2, dim=0) + self.epsilon)
+        # Connections for P5_0, P5_1 and P4_2 to P5_2 respectively
+        p5_out = self.conv5_down(
+            weight[0] * p5_in + weight[1] * p5_up + weight[2] * self.p5_downsample(p4_out))
+        # Weights for P6_0, P6_1 and P5_2 to P6_2
+        p6_w2 = self.p6_w2_relu(self.p6_w2)
+        weight = p6_w2 / (torch.sum(p6_w2, dim=0) + self.epsilon)
+        # Connections for P6_0, P6_1 and P5_2 to P6_2 respectively
+        p6_out = self.conv6_down(
+            weight[0] * p6_in + weight[1] * p6_up + weight[2] * self.p6_downsample(p5_out))
+        # Weights for P7_0 and P6_2 to P7_2
+        p7_w2 = self.p7_w2_relu(self.p7_w2)
+        weight = p7_w2 / (torch.sum(p7_w2, dim=0) + self.epsilon)
+        # Connections for P7_0 and P6_2 to P7_2
+        p7_out = self.conv7_down(weight[0] * p7_in + weight[1] * self.p7_downsample(p6_out))
+
+        return p3_out, p4_out, p5_out, p6_out, p7_out
+
+
+class Regressor(nn.Module):
+    def __init__(self, in_channels, num_anchors, num_layers):
+        super(Regressor, self).__init__()
+        layers = []
+        for _ in range(num_layers):
+            layers.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
+            layers.append(nn.ReLU(True))
+        self.layers = nn.Sequential(*layers)
+        self.header = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)
+
+    def forward(self, inputs):
+        inputs = self.layers(inputs)
+        inputs = self.header(inputs)
+        output = inputs.permute(0, 2, 3, 1)
+        return output.contiguous().view(output.shape[0], -1, 4)
+
+
+class Classifier(nn.Module):
+    def __init__(self, in_channels, num_anchors, num_classes, num_layers):
+        super(Classifier, self).__init__()
+        self.num_anchors = num_anchors
+        self.num_classes = num_classes
+        layers = []
+        for _ in range(num_layers):
+            layers.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
+            layers.append(nn.ReLU(True))
+        self.layers = nn.Sequential(*layers)
+        self.header = nn.Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
+        self.act = nn.Sigmoid()
+
+    def forward(self, inputs):
+        inputs = self.layers(inputs)
+        inputs = self.header(inputs)
+        inputs = self.act(inputs)
+        inputs = inputs.permute(0, 2, 3, 1)
+        output = inputs.contiguous().view(inputs.shape[0], inputs.shape[1], inputs.shape[2], self.num_anchors,
+                                          self.num_classes)
+        return output.contiguous().view(output.shape[0], -1, self.num_classes)
+
+
+class EfficientNet(nn.Module):
+    def __init__(self, ):
+        super(EfficientNet, self).__init__()
+        model = EffNet.from_pretrained('efficientnet-b0')
+        del model._conv_head
+        del model._bn1
+        del model._avg_pooling
+        del model._dropout
+        del model._fc
+        self.model = model
+
+    def forward(self, x):
+        x = self.model._swish(self.model._bn0(self.model._conv_stem(x)))
+        feature_maps = []
+        for idx, block in enumerate(self.model._blocks):
+            drop_connect_rate = self.model._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self.model._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if block._depthwise_conv.stride == [2, 2]:
+                feature_maps.append(x)
+
+        return feature_maps[1:]
+
+
+class EfficientDet(nn.Module):
+    def __init__(self, num_anchors=9, num_classes=20, compound_coef=0):
+        super(EfficientDet, self).__init__()
+        self.compound_coef = compound_coef
+
+        self.num_channels = [64, 88, 112, 160, 224, 288, 384, 384][self.compound_coef]
+
+        self.conv3 = nn.Conv2d(40, self.num_channels, kernel_size=1, stride=1, padding=0)
+        self.conv4 = nn.Conv2d(80, self.num_channels, kernel_size=1, stride=1, padding=0)
+        self.conv5 = nn.Conv2d(192, self.num_channels, kernel_size=1, stride=1, padding=0)
+        self.conv6 = nn.Conv2d(192, self.num_channels, kernel_size=3, stride=2, padding=1)
+        self.conv7 = nn.Sequential(nn.ReLU(),
+                                   nn.Conv2d(self.num_channels, self.num_channels, kernel_size=3, stride=2, padding=1))
+
+        self.bifpn = nn.Sequential(*[BiFPN(self.num_channels) for _ in range(min(2 + self.compound_coef, 8))])
+
+        self.num_classes = num_classes
+        self.regressor = Regressor(in_channels=self.num_channels, num_anchors=num_anchors,
+                                   num_layers=3 + self.compound_coef // 3)
+        self.classifier = Classifier(in_channels=self.num_channels, num_anchors=num_anchors, num_classes=num_classes,
+                                     num_layers=3 + self.compound_coef // 3)
+
+        self.anchors = Anchors()
+        self.regressBoxes = BBoxTransform()
+        self.clipBoxes = ClipBoxes()
+        self.focalLoss = FocalLoss()
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+        prior = 0.01
+
+        self.classifier.header.weight.data.fill_(0)
+        self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior))
+
+        self.regressor.header.weight.data.fill_(0)
+        self.regressor.header.bias.data.fill_(0)
+
+        self.backbone_net = EfficientNet()
+
+    def freeze_bn(self):
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+
+    def forward(self, inputs):
+        if len(inputs) == 2:
+            is_training = True
+            img_batch, annotations = inputs
+        else:
+            is_training = False
+            img_batch = inputs
+
+        c3, c4, c5 = self.backbone_net(img_batch)
+        p3 = self.conv3(c3)
+        p4 = self.conv4(c4)
+        p5 = self.conv5(c5)
+        p6 = self.conv6(c5)
+        p7 = self.conv7(p6)
+
+        features = [p3, p4, p5, p6, p7]
+        features = self.bifpn(features)
+
+        regression = torch.cat([self.regressor(feature) for feature in features], dim=1)
+        classification = torch.cat([self.classifier(feature) for feature in features], dim=1)
+        anchors = self.anchors(img_batch)
+
+        if is_training:
+            return self.focalLoss(classification, regression, anchors, annotations)
+        else:
+            transformed_anchors = self.regressBoxes(anchors, regression)
+            transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
+
+            scores = torch.max(classification, dim=2, keepdim=True)[0]
+
+            scores_over_thresh = (scores > 0.05)[0, :, 0]
+
+            if scores_over_thresh.sum() == 0:
+                return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
+
+            classification = classification[:, scores_over_thresh, :]
+            transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
+            scores = scores[:, scores_over_thresh, :]
+
+            anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)
+
+            nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
+
+            return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
+
+
+if __name__ == '__main__':
+    from tensorboardX import SummaryWriter
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    model = EfficientDet(num_classes=80)
+    print (count_parameters(model))
--- a/opencv-yolo/web/src/utils.py
+++ b/opencv-yolo/web/src/utils.py
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+class BBoxTransform(nn.Module):
+
+    def __init__(self, mean=None, std=None):
+        super(BBoxTransform, self).__init__()
+        if mean is None:
+            self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
+        else:
+            self.mean = mean
+        if std is None:
+            self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
+        else:
+            self.std = std
+        if torch.cuda.is_available():
+            self.mean = self.mean.cuda()
+            self.std = self.std.cuda()
+
+    def forward(self, boxes, deltas):
+
+        widths = boxes[:, :, 2] - boxes[:, :, 0]
+        heights = boxes[:, :, 3] - boxes[:, :, 1]
+        ctr_x = boxes[:, :, 0] + 0.5 * widths
+        ctr_y = boxes[:, :, 1] + 0.5 * heights
+
+        dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
+        dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
+        dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
+        dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
+
+        pred_ctr_x = ctr_x + dx * widths
+        pred_ctr_y = ctr_y + dy * heights
+        pred_w = torch.exp(dw) * widths
+        pred_h = torch.exp(dh) * heights
+
+        pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
+        pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
+        pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
+        pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
+
+        pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
+
+        return pred_boxes
+
+
+class ClipBoxes(nn.Module):
+
+    def __init__(self):
+        super(ClipBoxes, self).__init__()
+
+    def forward(self, boxes, img):
+        batch_size, num_channels, height, width = img.shape
+
+        boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
+        boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
+
+        boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
+        boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
+
+        return boxes
+
+
+class Anchors(nn.Module):
+    def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
+        super(Anchors, self).__init__()
+
+        if pyramid_levels is None:
+            self.pyramid_levels = [3, 4, 5, 6, 7]
+        if strides is None:
+            self.strides = [2 ** x for x in self.pyramid_levels]
+        if sizes is None:
+            self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
+        if ratios is None:
+            self.ratios = np.array([0.5, 1, 2])
+        if scales is None:
+            self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+
+    def forward(self, image):
+
+        image_shape = image.shape[2:]
+        image_shape = np.array(image_shape)
+        image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
+
+        all_anchors = np.zeros((0, 4)).astype(np.float32)
+
+        for idx, p in enumerate(self.pyramid_levels):
+            anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
+            shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
+            all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
+
+        all_anchors = np.expand_dims(all_anchors, axis=0)
+
+        anchors = torch.from_numpy(all_anchors.astype(np.float32))
+        if torch.cuda.is_available():
+            anchors = anchors.cuda()
+        return anchors
+
+
+def generate_anchors(base_size=16, ratios=None, scales=None):
+    if ratios is None:
+        ratios = np.array([0.5, 1, 2])
+
+    if scales is None:
+        scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+
+    num_anchors = len(ratios) * len(scales)
+    anchors = np.zeros((num_anchors, 4))
+    anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
+    areas = anchors[:, 2] * anchors[:, 3]
+    anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
+    anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
+    anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
+    anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
+
+    return anchors
+
+
+def compute_shape(image_shape, pyramid_levels):
+    image_shape = np.array(image_shape[:2])
+    image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
+    return image_shapes
+
+
+def shift(shape, stride, anchors):
+    shift_x = (np.arange(0, shape[1]) + 0.5) * stride
+    shift_y = (np.arange(0, shape[0]) + 0.5) * stride
+    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+    shifts = np.vstack((
+        shift_x.ravel(), shift_y.ravel(),
+        shift_x.ravel(), shift_y.ravel()
+    )).transpose()
+
+    A = anchors.shape[0]
+    K = shifts.shape[0]
+    all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+    all_anchors = all_anchors.reshape((K * A, 4))
+
+    return all_anchors