ai.detect: region feature

f99bf983 · blu · 260a5a91 · f99bf983 · f99bf983
--- a/opencv-motion-detect/evslicer.cpp
+++ b/opencv-motion-detect/evslicer.cpp
@@ -936,6 +936,7 @@ public:
        }
        spdlog::info("{} boot", selfId);
+        //TODO: single process
        SingletonProcess self(selfName, iid);
        if(!self()){
          spdlog::error("{} already running. ignore this instance", selfId);

--- a/opencv-yolo/web/detect_video.py
+++ b/opencv-yolo/web/detect_video.py
@@ -2,21 +2,28 @@
 import argparse
 import torch
 from src.config import COCO_CLASSES, colors
-import cv2, datetime
+import cv2
+import datetime
 import numpy as np
 def get_args():
    parser = argparse.ArgumentParser(
        "EfficientDet: Scalable and Efficient Object Detection implementation by Signatrix GmbH")
-    parser.add_argument("--image_size", type=int, default=512, help="The common width and height for all images")
+    parser.add_argument("--image_size", type=int, default=512,
+                        help="The common width and height for all images")
    parser.add_argument("--cls_threshold", type=float, default=0.5)
    parser.add_argument("--nms_threshold", type=float, default=0.5)
-    parser.add_argument("-c", "--pretrained_model", type=str, default="edet_model.pth")
+    parser.add_argument("-s", "--scale", type=str, default='0,0,1,1')
+    parser.add_argument("-c", "--pretrained_model",
+                        type=str, default="edet_model.pth")
    parser.add_argument("input", type=str, default="input.mp4")
-    parser.add_argument("-o", "--output", type=str, default="detect_person.jpg")
+    parser.add_argument("-o", "--output", type=str,
+                        default="detect_person.jpg")
    args = parser.parse_args()
    return args
 def test(opt):
    tsEpoch = datetime.datetime.utcfromtimestamp(0)
    model = torch.load(opt.pretrained_model, map_location='cpu').module
@@ -31,19 +38,58 @@ def test(opt):
    fname = opt.output + "_" + str(ts) + ".jpg"
    frameCnt = 0
    tsStart = datetime.datetime.now()
+    x1, y1, x2, y2 = opt.scale.split(',')
+    try:
+        x1 = float(x1)
+        y1 = float(y1)
+        x2 = float(x2)
+        y2 = float(y2)
+        if x1 < 0 or y1 < 0 or x2 > 1 or y2 > 1 or x1 > x2 or y1 > y2:
+            raise
+    except:
+        print("invalid region config: {}".format(opt.scale))
+        x1 = 0
+        y2 = 0
+        x2 = 1
+        y2 = 1
+    hasRegion = False
+    if x1 == 0 and y1 == 0 and x2 == 1 and y2 == 1:
+        pass
+    else:
+        print("region: {}, {}, {}, {}".format(x1, y1, x2, y2))
+        hasRegion = True
    while cap.isOpened():
+        image = None
        flag, image = cap.read()
-        output_image = np.copy(image)
+        height = None
+        width = None
        if flag:
+            height, width = image.shape[:2]
+            output_image = np.copy(image)
+            if hasRegion:
+                ratio = width/(height * 1.0)
+                px1 = int(width * x1)
+                py1 = int(height * y1)
+                px2 = int(width * x2)
+                py2 = int(height * y2)
+                # height = py2 - py1
+                # width = px2 - px1
+                # image = image[py1:py2, px1:px2]
+                image[0:py1,:,:] = 0
+                image[py2:, :, :] = 0
+                image[:, 0:px1, :] = 0
+                image[:,px2:, :] = 0
+            #output_image = np.copy(image)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        else:
            break
        frameCnt += 1
        if frameCnt % (18*3) == 0:
-          pass
+            pass
        else:
-          continue
+            continue
-        height, width = image.shape[:2]
        image = image.astype(np.float32) / 255
        image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
        image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
@@ -81,21 +127,24 @@ def test(opt):
                continue
            xmin, ymin, xmax, ymax = boxes[box_id, :]
            color = colors[pred_label]
-            color = (255,0,0)
+            color = (255, 0, 0)
            font = cv2.FONT_HERSHEY_PLAIN
            cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 1)
-            text_size = cv2.getTextSize(COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, font, 1, 1)[0]
+            text_size = cv2.getTextSize(
-            cv2.rectangle(output_image, (xmin, ymin), (xmin + text_size[0] + 1, ymin + text_size[1] + 1), color, -1)
+                COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, font, 1, 1)[0]
+            cv2.rectangle(output_image, (xmin, ymin), (xmin +
+                                                       text_size[0] + 1, ymin + text_size[1] + 1), color, -1)
            cv2.putText(
                output_image, COCO_CLASSES[pred_label] + ': %.3f' % pred_prob,
                (xmin, ymin + text_size[1] + 1), font, 1,
                (255, 255, 255), 1)
            if not bDetected:
-              elapse = 0
+                elapse = 0
-              if fps:
+                if fps:
-                elapse = int(frameCnt / fps)
+                    elapse = int(frameCnt / fps)
-              strDetMsg = "edet found human {:.3f} x: {}, y: {}, w: {}, h: {}; written image: {}; time: {}".format(pred_prob, int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin), fname, elapse)
+                strDetMsg = "edet found human {:.3f} x: {}, y: {}, w: {}, h: {}; written image: {}; time: {}".format(
-              bDetected = True
+                    pred_prob, int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin), fname, elapse)
+                bDetected = True
        if frameCnt % 1000 == 0:
            tsEnd = datetime.datetime.now()
@@ -109,6 +158,7 @@ def test(opt):
            break
    cap.release()
 if __name__ == "__main__":
    opt = get_args()
    opt.output = opt.output[0:opt.output.rfind(".")]