#!python
import argparse
import torch
from src.config import COCO_CLASSES, colors
import cv2, datetime
import numpy as np

def get_args():
    parser = argparse.ArgumentParser(
        "EfficientDet: Scalable and Efficient Object Detection implementation by Signatrix GmbH")
    parser.add_argument("--image_size", type=int, default=512, help="The common width and height for all images")
    parser.add_argument("--cls_threshold", type=float, default=0.5)
    parser.add_argument("--nms_threshold", type=float, default=0.5)
    parser.add_argument("-c", "--pretrained_model", type=str, default="edet_model.pth")
    parser.add_argument("input", type=str, default="input.mp4")
    parser.add_argument("-o", "--output", type=str, default="detect_person.jpg")
    args = parser.parse_args()
    return args

def test(opt):
    tsEpoch = datetime.datetime.utcfromtimestamp(0)
    model = torch.load(opt.pretrained_model, map_location='cpu').module
    if torch.cuda.is_available():
        model.cuda()

    cap = cv2.VideoCapture(opt.input)
    bDetected = False
    strDetMsg = ''
    ts = int((datetime.datetime.now() - tsEpoch).total_seconds())
    fname = opt.output + "_" + str(ts) + ".jpg"
    frameCnt = 0
    tsStart = datetime.datetime.now()
    while cap.isOpened():
        flag, image = cap.read()
        output_image = np.copy(image)
        if flag:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        else:
            break
        frameCnt += 1
        if frameCnt % (18*3) == 0:
          pass
        else:
          continue
        height, width = image.shape[:2]
        image = image.astype(np.float32) / 255
        image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229
        image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224
        image[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225
        if height > width:
            scale = opt.image_size / height
            resized_height = opt.image_size
            resized_width = int(width * scale)
        else:
            scale = opt.image_size / width
            resized_height = int(height * scale)
            resized_width = opt.image_size

        image = cv2.resize(image, (resized_width, resized_height))

        new_image = np.zeros((opt.image_size, opt.image_size, 3))
        new_image[0:resized_height, 0:resized_width] = image
        new_image = np.transpose(new_image, (2, 0, 1))
        new_image = new_image[None, :, :, :]
        new_image = torch.Tensor(new_image)
        if torch.cuda.is_available():
            new_image = new_image.cuda()
        with torch.no_grad():
            scores, labels, boxes = model(new_image)
            boxes /= scale
        if boxes.shape[0] == 0:
            continue

        for box_id in range(boxes.shape[0]):
            pred_prob = float(scores[box_id])
            if pred_prob < opt.cls_threshold:
                continue
            pred_label = int(labels[box_id])
            if COCO_CLASSES[pred_label] != 'person':
                continue
            xmin, ymin, xmax, ymax = boxes[box_id, :]
            color = colors[pred_label]
            color = (255,0,0)
            font = cv2.FONT_HERSHEY_PLAIN
            cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 1)
            text_size = cv2.getTextSize(COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, font, 1, 1)[0]
            cv2.rectangle(output_image, (xmin, ymin), (xmin + text_size[0] + 1, ymin + text_size[1] + 1), color, -1)
            cv2.putText(
                output_image, COCO_CLASSES[pred_label] + ': %.3f' % pred_prob,
                (xmin, ymin + text_size[1] + 1), font, 1,
                (255, 255, 255), 1)
            if not bDetected:
              strDetMsg = "edet found human {:.3f} x: {}, y: {}, w: {}, h: {}; written image: {}".format(pred_prob, int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin), fname)
              bDetected = True

        if frameCnt % 1000 == 0:
            tsEnd = datetime.datetime.now()
            fps = frameCnt/(tsEnd - tsStart).total_seconds()
            print("fps: ", fps)
            frameCnt = 0
            tsStart = datetime.datetime.now()
        if bDetected:
            cv2.imwrite(fname, output_image)
            print(strDetMsg)
            break
    cap.release()

if __name__ == "__main__":
    opt = get_args()
    opt.output = opt.output[0:opt.output.rfind(".")]
    test(opt)