#!python import argparse import torch from src.config import COCO_CLASSES, colors import cv2 import datetime import numpy as np def get_args(): parser = argparse.ArgumentParser( "EfficientDet: Scalable and Efficient Object Detection implementation by Signatrix GmbH") parser.add_argument("--image_size", type=int, default=512, help="The common width and height for all images") parser.add_argument("--cls_threshold", type=float, default=0.5) parser.add_argument("--nms_threshold", type=float, default=0.5) parser.add_argument("-s", "--scale", type=str, default='0,0,1,1') parser.add_argument("-c", "--pretrained_model", type=str, default="edet_model.pth") parser.add_argument("input", type=str, default="input.mp4") parser.add_argument("-o", "--output", type=str, default="detect_person.jpg") args = parser.parse_args() return args def test(opt): tsEpoch = datetime.datetime.utcfromtimestamp(0) model = torch.load(opt.pretrained_model, map_location='cpu').module if torch.cuda.is_available(): model.cuda() cap = cv2.VideoCapture(opt.input) fps = cap.get(cv2.CAP_PROP_FPS) bDetected = False strDetMsg = '' ts = int((datetime.datetime.now() - tsEpoch).total_seconds()) fname = opt.output + "_" + str(ts) + ".jpg" frameCnt = 0 tsStart = datetime.datetime.now() x1, y1, x2, y2 = opt.scale.split(',') try: x1 = float(x1) y1 = float(y1) x2 = float(x2) y2 = float(y2) if x1 < 0 or y1 < 0 or x2 > 1 or y2 > 1 or x1 > x2 or y1 > y2: raise except: print("invalid region config: {}".format(opt.scale)) x1 = 0 y2 = 0 x2 = 1 y2 = 1 hasRegion = False if x1 == 0 and y1 == 0 and x2 == 1 and y2 == 1: pass else: print("region: {}, {}, {}, {}".format(x1, y1, x2, y2)) hasRegion = True while cap.isOpened(): image = None flag, image = cap.read() height = None width = None if flag: height, width = image.shape[:2] output_image = np.copy(image) if hasRegion: ratio = width/(height * 1.0) px1 = int(width * x1) py1 = int(height * y1) px2 = int(width * x2) py2 = int(height * y2) # height = py2 - py1 # width = px2 - px1 # image = image[py1:py2, px1:px2] image[0:py1,:,:] = 0 image[py2:, :, :] = 0 image[:, 0:px1, :] = 0 image[:,px2:, :] = 0 #output_image = np.copy(image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) else: break frameCnt += 1 if frameCnt % (18*3) == 0: pass else: continue image = image.astype(np.float32) / 255 image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229 image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224 image[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225 if height > width: scale = opt.image_size / height resized_height = opt.image_size resized_width = int(width * scale) else: scale = opt.image_size / width resized_height = int(height * scale) resized_width = opt.image_size image = cv2.resize(image, (resized_width, resized_height)) new_image = np.zeros((opt.image_size, opt.image_size, 3)) new_image[0:resized_height, 0:resized_width] = image new_image = np.transpose(new_image, (2, 0, 1)) new_image = new_image[None, :, :, :] new_image = torch.Tensor(new_image) if torch.cuda.is_available(): new_image = new_image.cuda() with torch.no_grad(): scores, labels, boxes = model(new_image) boxes /= scale if boxes.shape[0] == 0: continue for box_id in range(boxes.shape[0]): pred_prob = float(scores[box_id]) if pred_prob < opt.cls_threshold: continue pred_label = int(labels[box_id]) if COCO_CLASSES[pred_label] != 'person': continue xmin, ymin, xmax, ymax = boxes[box_id, :] color = colors[pred_label] color = (255, 0, 0) font = cv2.FONT_HERSHEY_PLAIN cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 1) if hasRegion: colorG = (0, 255, 0) cv2.rectangle(output_image, (px1, py1), (px2, py2), colorG, 1) text_size = cv2.getTextSize( COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, font, 1, 1)[0] cv2.rectangle(output_image, (xmin, ymin), (xmin + text_size[0] + 1, ymin + text_size[1] + 1), color, -1) cv2.putText( output_image, COCO_CLASSES[pred_label] + ': %.3f' % pred_prob, (xmin, ymin + text_size[1] + 1), font, 1, (255, 255, 255), 1) if not bDetected: elapse = 0 if fps: elapse = int(frameCnt / fps) strDetMsg = "edet found human {:.3f} x: {}, y: {}, w: {}, h: {}; written image: {}; time: {}".format( pred_prob, int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin), fname, elapse) bDetected = True if frameCnt % 1000 == 0: tsEnd = datetime.datetime.now() fps = frameCnt/(tsEnd - tsStart).total_seconds() print("fps: ", fps) frameCnt = 0 tsStart = datetime.datetime.now() if bDetected: cv2.imwrite(fname, output_image) print(strDetMsg) break cap.release() if __name__ == "__main__": opt = get_args() opt.output = opt.output[0:opt.output.rfind(".")] test(opt)