提交 f6d38d05 authored 作者: blu's avatar blu

init

上级 27cd40b2
FROM python:slim
ENV MAINTAINER=Bruce.Lu
WORKDIR /apps/app
RUN apt -qq update && apt install -y redis-server
ENV BIN_PRE=/usr/local/bin/python
ENV BIN_NAME=web/detect_video.py
ENV DL_DIR=/data
#ENV REDIS=
ENV CFG_DIR=/apps/app/
ENV BIN_DIR=/apps/app/
COPY opencv-yolo/web/web.py /apps/app/
COPY opencv-yolo/web/web.py /apps/app/
COPY opencv-yolo/web/detect_video.py /apps/app/
COPY opencv-yolo/web/requirement.txt /apps/app/
RUN pip install -r requirement.txt
COPY opencv-yolo/web/start.sh /apps/app
EXPOSE 5555
EXPOSE 5000
CMD ["./start.sh"]
\ No newline at end of file
#!python
import argparse
import torch
from src.config import COCO_CLASSES, colors
import cv2, datetime
import numpy as np
def get_args():
parser = argparse.ArgumentParser(
"EfficientDet: Scalable and Efficient Object Detection implementation by Signatrix GmbH")
parser.add_argument("--image_size", type=int, default=512, help="The common width and height for all images")
parser.add_argument("--cls_threshold", type=float, default=0.5)
parser.add_argument("--nms_threshold", type=float, default=0.5)
parser.add_argument("-c", "--pretrained_model", type=str, default="signatrix_efficientdet_coco.pth")
parser.add_argument("input", type=str, default="input.mp4")
parser.add_argument("-o", "--output", type=str, default="detect_person.jpg")
args = parser.parse_args()
return args
def test(opt):
tsEpoch = datetime.datetime.utcfromtimestamp(0)
model = torch.load(opt.pretrained_model, map_location='cpu').module
if torch.cuda.is_available():
model.cuda()
cap = cv2.VideoCapture(opt.input)
bDetected = False
strDetMsg = ''
ts = int((datetime.datetime.now() - tsEpoch).total_seconds())
fname = opt.output + "_" + str(ts) + ".jpg"
while cap.isOpened():
flag, image = cap.read()
output_image = np.copy(image)
if flag:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
break
height, width = image.shape[:2]
image = image.astype(np.float32) / 255
image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
image[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225
if height > width:
scale = opt.image_size / height
resized_height = opt.image_size
resized_width = int(width * scale)
else:
scale = opt.image_size / width
resized_height = int(height * scale)
resized_width = opt.image_size
image = cv2.resize(image, (resized_width, resized_height))
new_image = np.zeros((opt.image_size, opt.image_size, 3))
new_image[0:resized_height, 0:resized_width] = image
new_image = np.transpose(new_image, (2, 0, 1))
new_image = new_image[None, :, :, :]
new_image = torch.Tensor(new_image)
if torch.cuda.is_available():
new_image = new_image.cuda()
with torch.no_grad():
scores, labels, boxes = model(new_image)
boxes /= scale
if boxes.shape[0] == 0:
continue
for box_id in range(boxes.shape[0]):
pred_prob = float(scores[box_id])
if pred_prob < opt.cls_threshold:
continue
pred_label = int(labels[box_id])
if COCO_CLASSES[pred_label] != 'person':
continue
xmin, ymin, xmax, ymax = boxes[box_id, :]
color = colors[pred_label]
color = (255,0,0)
font = cv2.FONT_HERSHEY_PLAIN
cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 1)
text_size = cv2.getTextSize(COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, font, 1, 1)[0]
cv2.rectangle(output_image, (xmin, ymin), (xmin + text_size[0] + 1, ymin + text_size[1] + 1), color, -1)
cv2.putText(
output_image, COCO_CLASSES[pred_label] + ': %.3f' % pred_prob,
(xmin, ymin + text_size[1] + 1), font, 1,
(255, 255, 255), 1)
if not bDetected:
strDetMsg = "edet found human {:.3f} x: {}, y: {}, w: {}, h: {}; written image: {}".format(pred_prob, int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin), fname)
bDetected = True
if bDetected:
cv2.imwrite(fname, output_image)
print(strDetMsg)
break
cap.release()
if __name__ == "__main__":
opt = get_args()
opt.output = opt.output[0:opt.output.rfind(".")]
test(opt)
flask
cerberus
celery
flower
paho-mqtt
azure-storage-file-share
pyyaml
redis
efficientnet_pytorch
tensorboardX
pycocotools
\ No newline at end of file
COCO_CLASSES = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
"broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
"bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
"teddy bear", "hair drier", "toothbrush"]
colors = [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), (14, 89, 122),
(80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, 132), (169, 158, 85), (188, 185, 26), (103, 1, 17),
(82, 144, 81), (92, 7, 184), (49, 81, 155), (179, 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60),
(16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, 159), (182, 173, 32), (34, 113, 133), (90, 135, 34),
(53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, 112), (89, 60, 55), (15, 54, 88), (112, 75, 181),
(42, 147, 38), (138, 52, 63), (128, 65, 149), (106, 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108),
(52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, 148), (182, 101, 89), (44, 65, 179), (1, 33, 26),
(122, 164, 26), (70, 63, 134), (137, 106, 82), (120, 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50),
(56, 50, 20), (2, 22, 177), (156, 100, 106), (21, 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33),
(105, 118, 30), (7, 185, 124), (46, 34, 146), (105, 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91),
(31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, 106), (113, 68, 54), (136, 116, 112), (119, 139, 130),
(31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, 180), (49, 119, 155), (153, 50, 183), (125, 38, 3),
(129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, 148), (28, 144, 118), (29, 9, 24), (175, 45, 108),
(81, 175, 64), (178, 19, 157), (74, 188, 190), (18, 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95),
(2, 20, 184), (122, 37, 185)]
import os
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import cv2
class CocoDataset(Dataset):
def __init__(self, root_dir, set='train2017', transform=None):
self.root_dir = root_dir
self.set_name = set
self.transform = transform
self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
self.image_ids = self.coco.getImgIds()
self.load_classes()
def load_classes(self):
# load class names (name -> label)
categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x['id'])
self.classes = {}
self.coco_labels = {}
self.coco_labels_inverse = {}
for c in categories:
self.coco_labels[len(self.classes)] = c['id']
self.coco_labels_inverse[c['id']] = len(self.classes)
self.classes[c['name']] = len(self.classes)
# also load the reverse (label -> name)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
def __len__(self):
return len(self.image_ids)
def __getitem__(self, idx):
img = self.load_image(idx)
annot = self.load_annotations(idx)
sample = {'img': img, 'annot': annot}
if self.transform:
sample = self.transform(sample)
return sample
def load_image(self, image_index):
image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
path = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
img = cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# if len(img.shape) == 2:
# img = skimage.color.gray2rgb(img)
return img.astype(np.float32) / 255.
def load_annotations(self, image_index):
# get ground truth annotations
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
annotations = np.zeros((0, 5))
# some images appear to miss annotations
if len(annotations_ids) == 0:
return annotations
# parse annotations
coco_annotations = self.coco.loadAnns(annotations_ids)
for idx, a in enumerate(coco_annotations):
# some annotations have basically no width / height, skip them
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, :4] = a['bbox']
annotation[0, 4] = self.coco_label_to_label(a['category_id'])
annotations = np.append(annotations, annotation, axis=0)
# transform from [x, y, w, h] to [x1, y1, x2, y2]
annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
return annotations
def coco_label_to_label(self, coco_label):
return self.coco_labels_inverse[coco_label]
def label_to_coco_label(self, label):
return self.coco_labels[label]
def num_classes(self):
return 80
def collater(data):
imgs = [s['img'] for s in data]
annots = [s['annot'] for s in data]
scales = [s['scale'] for s in data]
imgs = torch.from_numpy(np.stack(imgs, axis=0))
max_num_annots = max(annot.shape[0] for annot in annots)
if max_num_annots > 0:
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
if max_num_annots > 0:
for idx, annot in enumerate(annots):
if annot.shape[0] > 0:
annot_padded[idx, :annot.shape[0], :] = annot
else:
annot_padded = torch.ones((len(annots), 1, 5)) * -1
imgs = imgs.permute(0, 3, 1, 2)
return {'img': imgs, 'annot': annot_padded, 'scale': scales}
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, common_size=512):
image, annots = sample['img'], sample['annot']
height, width, _ = image.shape
if height > width:
scale = common_size / height
resized_height = common_size
resized_width = int(width * scale)
else:
scale = common_size / width
resized_height = int(height * scale)
resized_width = common_size
image = cv2.resize(image, (resized_width, resized_height))
new_image = np.zeros((common_size, common_size, 3))
new_image[0:resized_height, 0:resized_width] = image
annots[:, :4] *= scale
return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
class Augmenter(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample, flip_x=0.5):
if np.random.rand() < flip_x:
image, annots = sample['img'], sample['annot']
image = image[:, ::-1, :]
rows, cols, channels = image.shape
x1 = annots[:, 0].copy()
x2 = annots[:, 2].copy()
x_tmp = x1.copy()
annots[:, 0] = cols - x2
annots[:, 2] = cols - x_tmp
sample = {'img': image, 'annot': annots}
return sample
class Normalizer(object):
def __init__(self):
self.mean = np.array([[[0.485, 0.456, 0.406]]])
self.std = np.array([[[0.229, 0.224, 0.225]]])
def __call__(self, sample):
image, annots = sample['img'], sample['annot']
return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots}
import torch
import torch.nn as nn
def calc_iou(a, b):
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
iw = torch.clamp(iw, min=0)
ih = torch.clamp(ih, min=0)
ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
ua = torch.clamp(ua, min=1e-8)
intersection = iw * ih
IoU = intersection / ua
return IoU
class FocalLoss(nn.Module):
def __init__(self):
super(FocalLoss, self).__init__()
def forward(self, classifications, regressions, anchors, annotations):
alpha = 0.25
gamma = 2.0
batch_size = classifications.shape[0]
classification_losses = []
regression_losses = []
anchor = anchors[0, :, :]
anchor_widths = anchor[:, 2] - anchor[:, 0]
anchor_heights = anchor[:, 3] - anchor[:, 1]
anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
for j in range(batch_size):
classification = classifications[j, :, :]
regression = regressions[j, :, :]
bbox_annotation = annotations[j, :, :]
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
if bbox_annotation.shape[0] == 0:
if torch.cuda.is_available():
regression_losses.append(torch.tensor(0).float().cuda())
classification_losses.append(torch.tensor(0).float().cuda())
else:
regression_losses.append(torch.tensor(0).float())
classification_losses.append(torch.tensor(0).float())
continue
classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])
IoU_max, IoU_argmax = torch.max(IoU, dim=1)
# compute the loss for classification
targets = torch.ones(classification.shape) * -1
if torch.cuda.is_available():
targets = targets.cuda()
targets[torch.lt(IoU_max, 0.4), :] = 0
positive_indices = torch.ge(IoU_max, 0.5)
num_positive_anchors = positive_indices.sum()
assigned_annotations = bbox_annotation[IoU_argmax, :]
targets[positive_indices, :] = 0
targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
alpha_factor = torch.ones(targets.shape) * alpha
if torch.cuda.is_available():
alpha_factor = alpha_factor.cuda()
alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
cls_loss = focal_weight * bce
zeros = torch.zeros(cls_loss.shape)
if torch.cuda.is_available():
zeros = zeros.cuda()
cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)
classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
if positive_indices.sum() > 0:
assigned_annotations = assigned_annotations[positive_indices, :]
anchor_widths_pi = anchor_widths[positive_indices]
anchor_heights_pi = anchor_heights[positive_indices]
anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
gt_widths = torch.clamp(gt_widths, min=1)
gt_heights = torch.clamp(gt_heights, min=1)
targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
targets_dw = torch.log(gt_widths / anchor_widths_pi)
targets_dh = torch.log(gt_heights / anchor_heights_pi)
targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
targets = targets.t()
norm = torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
if torch.cuda.is_available():
norm = norm.cuda()
targets = targets / norm
regression_diff = torch.abs(targets - regression[positive_indices, :])
regression_loss = torch.where(
torch.le(regression_diff, 1.0 / 9.0),
0.5 * 9.0 * torch.pow(regression_diff, 2),
regression_diff - 0.5 / 9.0
)
regression_losses.append(regression_loss.mean())
else:
if torch.cuda.is_available():
regression_losses.append(torch.tensor(0).float().cuda())
else:
regression_losses.append(torch.tensor(0).float())
return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0,
keepdim=True)
import torch.nn as nn
import torch
import math
from efficientnet_pytorch import EfficientNet as EffNet
from src.utils import BBoxTransform, ClipBoxes, Anchors
from src.loss import FocalLoss
from torchvision.ops.boxes import nms as nms_torch
def nms(dets, thresh):
return nms_torch(dets[:, :4], dets[:, 4], thresh)
class ConvBlock(nn.Module):
def __init__(self, num_channels):
super(ConvBlock, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(num_channels, num_channels, kernel_size=3, stride=1, padding=1, groups=num_channels),
nn.Conv2d(num_channels, num_channels, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(num_features=num_channels, momentum=0.9997, eps=4e-5), nn.ReLU())
def forward(self, input):
return self.conv(input)
class BiFPN(nn.Module):
def __init__(self, num_channels, epsilon=1e-4):
super(BiFPN, self).__init__()
self.epsilon = epsilon
# Conv layers
self.conv6_up = ConvBlock(num_channels)
self.conv5_up = ConvBlock(num_channels)
self.conv4_up = ConvBlock(num_channels)
self.conv3_up = ConvBlock(num_channels)
self.conv4_down = ConvBlock(num_channels)
self.conv5_down = ConvBlock(num_channels)
self.conv6_down = ConvBlock(num_channels)
self.conv7_down = ConvBlock(num_channels)
# Feature scaling layers
self.p6_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.p5_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.p4_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.p3_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.p4_downsample = nn.MaxPool2d(kernel_size=2)
self.p5_downsample = nn.MaxPool2d(kernel_size=2)
self.p6_downsample = nn.MaxPool2d(kernel_size=2)
self.p7_downsample = nn.MaxPool2d(kernel_size=2)
# Weight
self.p6_w1 = nn.Parameter(torch.ones(2))
self.p6_w1_relu = nn.ReLU()
self.p5_w1 = nn.Parameter(torch.ones(2))
self.p5_w1_relu = nn.ReLU()
self.p4_w1 = nn.Parameter(torch.ones(2))
self.p4_w1_relu = nn.ReLU()
self.p3_w1 = nn.Parameter(torch.ones(2))
self.p3_w1_relu = nn.ReLU()
self.p4_w2 = nn.Parameter(torch.ones(3))
self.p4_w2_relu = nn.ReLU()
self.p5_w2 = nn.Parameter(torch.ones(3))
self.p5_w2_relu = nn.ReLU()
self.p6_w2 = nn.Parameter(torch.ones(3))
self.p6_w2_relu = nn.ReLU()
self.p7_w2 = nn.Parameter(torch.ones(2))
self.p7_w2_relu = nn.ReLU()
def forward(self, inputs):
"""
P7_0 -------------------------- P7_2 -------->
P6_0 ---------- P6_1 ---------- P6_2 -------->
P5_0 ---------- P5_1 ---------- P5_2 -------->
P4_0 ---------- P4_1 ---------- P4_2 -------->
P3_0 -------------------------- P3_2 -------->
"""
# P3_0, P4_0, P5_0, P6_0 and P7_0
p3_in, p4_in, p5_in, p6_in, p7_in = inputs
# P7_0 to P7_2
# Weights for P6_0 and P7_0 to P6_1
p6_w1 = self.p6_w1_relu(self.p6_w1)
weight = p6_w1 / (torch.sum(p6_w1, dim=0) + self.epsilon)
# Connections for P6_0 and P7_0 to P6_1 respectively
p6_up = self.conv6_up(weight[0] * p6_in + weight[1] * self.p6_upsample(p7_in))
# Weights for P5_0 and P6_0 to P5_1
p5_w1 = self.p5_w1_relu(self.p5_w1)
weight = p5_w1 / (torch.sum(p5_w1, dim=0) + self.epsilon)
# Connections for P5_0 and P6_0 to P5_1 respectively
p5_up = self.conv5_up(weight[0] * p5_in + weight[1] * self.p5_upsample(p6_up))
# Weights for P4_0 and P5_0 to P4_1
p4_w1 = self.p4_w1_relu(self.p4_w1)
weight = p4_w1 / (torch.sum(p4_w1, dim=0) + self.epsilon)
# Connections for P4_0 and P5_0 to P4_1 respectively
p4_up = self.conv4_up(weight[0] * p4_in + weight[1] * self.p4_upsample(p5_up))
# Weights for P3_0 and P4_1 to P3_2
p3_w1 = self.p3_w1_relu(self.p3_w1)
weight = p3_w1 / (torch.sum(p3_w1, dim=0) + self.epsilon)
# Connections for P3_0 and P4_1 to P3_2 respectively
p3_out = self.conv3_up(weight[0] * p3_in + weight[1] * self.p3_upsample(p4_up))
# Weights for P4_0, P4_1 and P3_2 to P4_2
p4_w2 = self.p4_w2_relu(self.p4_w2)
weight = p4_w2 / (torch.sum(p4_w2, dim=0) + self.epsilon)
# Connections for P4_0, P4_1 and P3_2 to P4_2 respectively
p4_out = self.conv4_down(
weight[0] * p4_in + weight[1] * p4_up + weight[2] * self.p4_downsample(p3_out))
# Weights for P5_0, P5_1 and P4_2 to P5_2
p5_w2 = self.p5_w2_relu(self.p5_w2)
weight = p5_w2 / (torch.sum(p5_w2, dim=0) + self.epsilon)
# Connections for P5_0, P5_1 and P4_2 to P5_2 respectively
p5_out = self.conv5_down(
weight[0] * p5_in + weight[1] * p5_up + weight[2] * self.p5_downsample(p4_out))
# Weights for P6_0, P6_1 and P5_2 to P6_2
p6_w2 = self.p6_w2_relu(self.p6_w2)
weight = p6_w2 / (torch.sum(p6_w2, dim=0) + self.epsilon)
# Connections for P6_0, P6_1 and P5_2 to P6_2 respectively
p6_out = self.conv6_down(
weight[0] * p6_in + weight[1] * p6_up + weight[2] * self.p6_downsample(p5_out))
# Weights for P7_0 and P6_2 to P7_2
p7_w2 = self.p7_w2_relu(self.p7_w2)
weight = p7_w2 / (torch.sum(p7_w2, dim=0) + self.epsilon)
# Connections for P7_0 and P6_2 to P7_2
p7_out = self.conv7_down(weight[0] * p7_in + weight[1] * self.p7_downsample(p6_out))
return p3_out, p4_out, p5_out, p6_out, p7_out
class Regressor(nn.Module):
def __init__(self, in_channels, num_anchors, num_layers):
super(Regressor, self).__init__()
layers = []
for _ in range(num_layers):
layers.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(True))
self.layers = nn.Sequential(*layers)
self.header = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)
def forward(self, inputs):
inputs = self.layers(inputs)
inputs = self.header(inputs)
output = inputs.permute(0, 2, 3, 1)
return output.contiguous().view(output.shape[0], -1, 4)
class Classifier(nn.Module):
def __init__(self, in_channels, num_anchors, num_classes, num_layers):
super(Classifier, self).__init__()
self.num_anchors = num_anchors
self.num_classes = num_classes
layers = []
for _ in range(num_layers):
layers.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
layers.append(nn.ReLU(True))
self.layers = nn.Sequential(*layers)
self.header = nn.Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
self.act = nn.Sigmoid()
def forward(self, inputs):
inputs = self.layers(inputs)
inputs = self.header(inputs)
inputs = self.act(inputs)
inputs = inputs.permute(0, 2, 3, 1)
output = inputs.contiguous().view(inputs.shape[0], inputs.shape[1], inputs.shape[2], self.num_anchors,
self.num_classes)
return output.contiguous().view(output.shape[0], -1, self.num_classes)
class EfficientNet(nn.Module):
def __init__(self, ):
super(EfficientNet, self).__init__()
model = EffNet.from_pretrained('efficientnet-b0')
del model._conv_head
del model._bn1
del model._avg_pooling
del model._dropout
del model._fc
self.model = model
def forward(self, x):
x = self.model._swish(self.model._bn0(self.model._conv_stem(x)))
feature_maps = []
for idx, block in enumerate(self.model._blocks):
drop_connect_rate = self.model._global_params.drop_connect_rate
if drop_connect_rate:
drop_connect_rate *= float(idx) / len(self.model._blocks)
x = block(x, drop_connect_rate=drop_connect_rate)
if block._depthwise_conv.stride == [2, 2]:
feature_maps.append(x)
return feature_maps[1:]
class EfficientDet(nn.Module):
def __init__(self, num_anchors=9, num_classes=20, compound_coef=0):
super(EfficientDet, self).__init__()
self.compound_coef = compound_coef
self.num_channels = [64, 88, 112, 160, 224, 288, 384, 384][self.compound_coef]
self.conv3 = nn.Conv2d(40, self.num_channels, kernel_size=1, stride=1, padding=0)
self.conv4 = nn.Conv2d(80, self.num_channels, kernel_size=1, stride=1, padding=0)
self.conv5 = nn.Conv2d(192, self.num_channels, kernel_size=1, stride=1, padding=0)
self.conv6 = nn.Conv2d(192, self.num_channels, kernel_size=3, stride=2, padding=1)
self.conv7 = nn.Sequential(nn.ReLU(),
nn.Conv2d(self.num_channels, self.num_channels, kernel_size=3, stride=2, padding=1))
self.bifpn = nn.Sequential(*[BiFPN(self.num_channels) for _ in range(min(2 + self.compound_coef, 8))])
self.num_classes = num_classes
self.regressor = Regressor(in_channels=self.num_channels, num_anchors=num_anchors,
num_layers=3 + self.compound_coef // 3)
self.classifier = Classifier(in_channels=self.num_channels, num_anchors=num_anchors, num_classes=num_classes,
num_layers=3 + self.compound_coef // 3)
self.anchors = Anchors()
self.regressBoxes = BBoxTransform()
self.clipBoxes = ClipBoxes()
self.focalLoss = FocalLoss()
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
prior = 0.01
self.classifier.header.weight.data.fill_(0)
self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior))
self.regressor.header.weight.data.fill_(0)
self.regressor.header.bias.data.fill_(0)
self.backbone_net = EfficientNet()
def freeze_bn(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()
def forward(self, inputs):
if len(inputs) == 2:
is_training = True
img_batch, annotations = inputs
else:
is_training = False
img_batch = inputs
c3, c4, c5 = self.backbone_net(img_batch)
p3 = self.conv3(c3)
p4 = self.conv4(c4)
p5 = self.conv5(c5)
p6 = self.conv6(c5)
p7 = self.conv7(p6)
features = [p3, p4, p5, p6, p7]
features = self.bifpn(features)
regression = torch.cat([self.regressor(feature) for feature in features], dim=1)
classification = torch.cat([self.classifier(feature) for feature in features], dim=1)
anchors = self.anchors(img_batch)
if is_training:
return self.focalLoss(classification, regression, anchors, annotations)
else:
transformed_anchors = self.regressBoxes(anchors, regression)
transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
scores = torch.max(classification, dim=2, keepdim=True)[0]
scores_over_thresh = (scores > 0.05)[0, :, 0]
if scores_over_thresh.sum() == 0:
return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
classification = classification[:, scores_over_thresh, :]
transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
scores = scores[:, scores_over_thresh, :]
anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)
nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
if __name__ == '__main__':
from tensorboardX import SummaryWriter
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
model = EfficientDet(num_classes=80)
print (count_parameters(model))
import torch
import torch.nn as nn
import numpy as np
class BBoxTransform(nn.Module):
def __init__(self, mean=None, std=None):
super(BBoxTransform, self).__init__()
if mean is None:
self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
else:
self.mean = mean
if std is None:
self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
else:
self.std = std
if torch.cuda.is_available():
self.mean = self.mean.cuda()
self.std = self.std.cuda()
def forward(self, boxes, deltas):
widths = boxes[:, :, 2] - boxes[:, :, 0]
heights = boxes[:, :, 3] - boxes[:, :, 1]
ctr_x = boxes[:, :, 0] + 0.5 * widths
ctr_y = boxes[:, :, 1] + 0.5 * heights
dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
pred_ctr_x = ctr_x + dx * widths
pred_ctr_y = ctr_y + dy * heights
pred_w = torch.exp(dw) * widths
pred_h = torch.exp(dh) * heights
pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
return pred_boxes
class ClipBoxes(nn.Module):
def __init__(self):
super(ClipBoxes, self).__init__()
def forward(self, boxes, img):
batch_size, num_channels, height, width = img.shape
boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
return boxes
class Anchors(nn.Module):
def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
super(Anchors, self).__init__()
if pyramid_levels is None:
self.pyramid_levels = [3, 4, 5, 6, 7]
if strides is None:
self.strides = [2 ** x for x in self.pyramid_levels]
if sizes is None:
self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
if ratios is None:
self.ratios = np.array([0.5, 1, 2])
if scales is None:
self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
def forward(self, image):
image_shape = image.shape[2:]
image_shape = np.array(image_shape)
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
all_anchors = np.zeros((0, 4)).astype(np.float32)
for idx, p in enumerate(self.pyramid_levels):
anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
all_anchors = np.expand_dims(all_anchors, axis=0)
anchors = torch.from_numpy(all_anchors.astype(np.float32))
if torch.cuda.is_available():
anchors = anchors.cuda()
return anchors
def generate_anchors(base_size=16, ratios=None, scales=None):
if ratios is None:
ratios = np.array([0.5, 1, 2])
if scales is None:
scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
num_anchors = len(ratios) * len(scales)
anchors = np.zeros((num_anchors, 4))
anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
areas = anchors[:, 2] * anchors[:, 3]
anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
return anchors
def compute_shape(image_shape, pyramid_levels):
image_shape = np.array(image_shape[:2])
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
return image_shapes
def shift(shape, stride, anchors):
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((
shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel()
)).transpose()
A = anchors.shape[0]
K = shifts.shape[0]
all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
return all_anchors
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论