yolo

eb73e47d · blu · 916cc313 · eb73e47d · eb73e47d · 916cc313
--- a/opencv-yolo/Makefile
+++ b/opencv-yolo/Makefile
@@ -3,7 +3,7 @@ CFLAGS = -g -Wall -std=c++11
 SRCS = main.cpp
 PROG = main

-OPENCV = `pkg-config opencv --cflags --libs`
+OPENCV = `pkg-config opencv4 --cflags --libs`
 LIBS = $(OPENCV)

 $(PROG):$(SRCS)

--- a/opencv-yolo/coco.names.txt
+++ b/opencv-yolo/coco.names.txt
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/opencv-yolo/main
+++ b/opencv-yolo/main
--- a/opencv-yolo/main.cpp
+++ b/opencv-yolo/main.cpp
@@ -33,7 +33,7 @@ int inpHeight = 416; // Height of network's input image
 vector<string> classes;

 // Remove the bounding boxes with low confidence using non-maxima suppression
-void postprocess(Mat& frame, const vector<Mat>& out);
+int postprocess(Mat& frame, const vector<Mat>& out);

 // Draw the predicted bounding box
 void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
@@ -80,8 +80,8 @@ int main(int argc, char** argv)
            ifstream ifile(str);
            if (!ifile) throw("error");
            cap.open(str);
-            //str.replace(str.end()-4, str.end(), "_yolo_out_cpp.jpg");
-            //outputFile = str;
+            str.replace(str.end()-4, str.end(), "_yolo_out_cpp"); //.jpg
+            outputFile = str;
        }
        else if (parser.has("video"))
        {
@@ -93,11 +93,12 @@ int main(int argc, char** argv)
            if(!cap.open(str)){
                cout << "failed to open video stream: " << str << endl;
            }
-            //str.replace(str.end()-4, str.end(), "_yolo_out_cpp.avi");
-            //outputFile = str;
-        }
+            str.replace(str.end()-4, str.end(), "_yolo_out_cpp.avi");
+        }else{
            // Open the webcaom
-        else cap.open(parser.get<int>("device"));
+            cap.open(parser.get<int>("device"));
+        }
+
        cout << "output file: " << outputFile << endl;
        
    }
@@ -117,7 +118,7 @@ int main(int argc, char** argv)

    // Process frames.
    long frameCnt = 0;
-    
+    long detCnt = 0;
    while (waitKey(1) < 0)
    {
        // get frame from the video
@@ -147,7 +148,10 @@ int main(int argc, char** argv)
        net.forward(outs, getOutputsNames(net));
        
        // Remove the bounding boxes with low confidence
-        postprocess(frame, outs);
+        int numDet = postprocess(frame, outs);
+        if(numDet == 0 && parser.has("image")) {
+            continue;
+        }
        
        // Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
        vector<double> layersTimes;
@@ -159,7 +163,11 @@ int main(int argc, char** argv)
        // Write the frame with the detection boxes
        Mat detectedFrame;
        frame.convertTo(detectedFrame, CV_8U);
-        if (parser.has("image")) imwrite(outputFile, detectedFrame);
+        if (parser.has("image")) {
+            string ofname = outputFile + to_string(detCnt) + ".jpg";
+            imwrite(ofname, detectedFrame);
+            detCnt++;
+        }
        else {
            video.write(detectedFrame);
        }    
@@ -173,7 +181,7 @@ int main(int argc, char** argv)
 }

 // Remove the bounding boxes with low confidence using non-maxima suppression
-void postprocess(Mat& frame, const vector<Mat>& outs)
+int postprocess(Mat& frame, const vector<Mat>& outs)
 {
    vector<int> classIds;
    vector<float> confidences;
@@ -219,6 +227,8 @@ void postprocess(Mat& frame, const vector<Mat>& outs)
        drawPred(classIds[idx], confidences[idx], box.x, box.y,
                 box.x + box.width, box.y + box.height, frame);
    }
+
+    return indices.size();
 }

 // Draw the predicted bounding box

--- a/opencv-yolo/yolov3-tiny.cfg
+++ b/opencv-yolo/yolov3-tiny.cfg
+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+# batch=64
+# subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 8
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+[yolo]
+mask = 0,1,2
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/opencv-yolo/yolov3-tiny.weights
+++ b/opencv-yolo/yolov3-tiny.weights