提交 b5bb0ed3 authored 作者: blu's avatar blu

object detection

上级 8fab9d1d
......@@ -13,5 +13,8 @@ all: $(PROG) $(PROG2)
$(PROG):$(SRCS)
$(CC) $(CFLAGS) -o $(PROG) $(SRCS) $(LIBS)
$(PROG2):$(SRCS2)
$(PROG2):$(SRCS2) yolo.hpp
$(CC) $(CFLAGS) -o $(PROG2) $(SRCS2) $(LIBS) -I../opencv-motion-detect/inc -I../opencv-motion-detect/vendor/include
clean:
rm -fr main yolo
\ No newline at end of file
#include <fstream>
#include "yolo.hpp"
#include "clipp.h"
#include <sstream>
#include <iostream>
#include <tuple>
#include "fs.h"
#include "spdlog/spdlog.h"
#ifdef _MY_HEADERS_
#include <opencv2/core/types_c.h>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#else
#include <opencv2/core/types_c.h>
#include <opencv2/opencv.hpp>
#endif
using namespace cv;
using namespace dnn;
using namespace clipp;
using namespace std;
class YoloDectect {
private:
// Initialize the parameters
const string selfId = "YoloDetector";
float confThreshold = 0.5; // Confidence threshold
float nmsThreshold = 0.4; // Non-maximum suppression threshold
int inpWidth = 416; // Width of network's input image
int inpHeight = 416; // Height of network's input image
vector<string> classes;
Net net;
Mat blob;
VideoCapture cap;
VideoWriter video;
bool bOutputIsImg = false;
string outFileBase;
bool cmdStop = false;
// Get the names of the output layers
vector<String> getOutputsNames(const Net& net)
{
static vector<String> names;
if (names.empty()) {
//Get the indices of the output layers, i.e. the layers with unconnected outputs
vector<int> outLayers = net.getUnconnectedOutLayers();
//get the names of all the layers in the network
vector<String> layersNames = net.getLayerNames();
// Get the names of the output layers in names
names.resize(outLayers.size());
for (size_t i = 0; i < outLayers.size(); ++i)
names[i] = layersNames[outLayers[i] - 1];
}
return names;
}
// draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
// draw a rectangle displaying the bounding box
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);
//get the label for the class name and its confidence
string label = format("%.2f", conf);
if (!classes.empty()) {
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ":" + label;
}
// display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(frame, Point(left, top - round(1.5*labelSize.height)), Point(left + round(1.5*labelSize.width), top + baseLine), Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,0),1);
}
// post process
vector<tuple<string, double, Rect>> postprocess(Mat& frame, const vector<Mat>& outs)
{
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i) {
// Scan through all the bounding boxes output from the network and keep only the
// ones with high confidence scores. Assign the box's class label as the class
// with the highest score for the box.
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) {
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
// Get the value and location of the maximum score
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > confThreshold) {
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
// Perform non maximum suppression to eliminate redundant overlapping boxes with lower confidences
vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
vector<tuple<string, double, Rect>> ret;
for (size_t i = 0; i < indices.size(); ++i) {
int idx = indices[i];
Rect box = boxes[idx];
ret.push_back(tuple<string, double, Rect>(classes[classIds[idx]], confidences[idx], box));
drawPred(classIds[idx], confidences[idx], box.x, box.y, box.x + box.width, box.y + box.height, frame);
}
return ret;
}
//
protected:
//
public:
typedef int (*callback)(vector<tuple<string, double, Rect>>&, Mat);
YoloDectect(string path = "")
{
if(path.empty()) {
path = ".";
}
// Load names of classes
string classesFile = path + "/coco.names";
// Give the configuration and weight files for the model
String modCfg = path + "/yolov3-tiny.cfg";
String modWeights = path + "/yolov3-tiny.weights";
if(!fs::exists(classesFile) || !fs::exists(modCfg) || !fs::exists(modWeights)) {
spdlog::error("{} failed to load configration files", selfId);
exit(1);
}
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line)) {
classes.push_back(line);
}
// Load the network
net = readNetFromDarknet(modCfg, modWeights);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
spdlog::info("{} inited", selfId);
}
vector<tuple<string, double, Rect>> process(Mat &inFrame, Mat &outFrame)
{
if(inFrame.empty()) {
return vector<tuple<string, double, Rect>>();
}
// Create a 4D blob from a frame.
blobFromImage(inFrame, blob, 1/255.0, cvSize(inpWidth, inpHeight), Scalar(0,0,0), true, false);
//Sets the input to the network
net.setInput(blob);
// Runs the forward pass to get output of the output layers
vector<Mat> outs;
net.forward(outs, getOutputsNames(net));
// Remove the bounding boxes with low confidence
auto ret = postprocess(inFrame, outs);
// The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
spdlog::info("{} infer time: {} ms", selfId, t);
inFrame.convertTo(outFrame, CV_8U);
return ret;
}
int process(string inVideoUri, string outFile = "processed.jpg", callback cb = nullptr)
{
if(inVideoUri.empty()) {
inVideoUri = "0";
}
if(!cap.open(inVideoUri)) {
spdlog::error("{} failed to open input video {}", selfId, inVideoUri);
return -1;
}
ghc::filesystem::path p(outFile);
auto dir = p.parent_path();
if((outFile.substr(outFile.find_last_of(".") + 1) == "jpg")) {
bOutputIsImg = true;
outFileBase = string(dir / p.stem());
spdlog::info("{} outFileBase {}", selfId, outFileBase);
}
else {
bOutputIsImg = false;
if(!video.open(outFile, VideoWriter::fourcc('M','J','P','G'), 28, Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)))) {
spdlog::error("{} failed to open output video {}", selfId, outFile);
return -1;
}
}
spdlog::info("{} try to process video {} to {}", selfId, inVideoUri, outFile);
long frameCnt = 0;
long detCnt = 0, skipCnt = 0;
Mat frame, outFrame;
while (waitKey(1) < 0) {
// get frame from the video
if(cmdStop) {
break;
}
if(!cap.read(frame)) {
spdlog::error("{} failed to read frame from {}", selfId, inVideoUri);
break;
}
frameCnt++;
// Stop the program if reached end of video
if (frame.empty()) {
continue;
}
vector<tuple<string, double, Rect>> ret = process(frame, outFrame);
if(cb == nullptr) {
if(ret.size() == 0 && bOutputIsImg) {
// no detection
if(skipCnt % 100 == 0) {
spdlog::info("{} no valid object detected skipped frame count {}", selfId, skipCnt);
}
skipCnt++;
continue;
}
if (bOutputIsImg) {
string ofname = outFileBase + to_string(detCnt) + ".jpg";
imwrite(ofname, outFrame);
detCnt++;
}
else {
video.write(outFrame);
}
}else{
cb(ret, outFrame);
}
}
cap.release();
if(!bOutputIsImg) video.release();
return 0;
}
};
int main(int argc, char** argv)
{
YoloDectect det;
det.process("rtsp://admin:ZQEAAI@192.168.0.101:554/h264/ch1/main/av_stream", "a.avi");
return 0;
int main(int argc, char *argv[]){
bool bHumanOnly = true;
float fConfident = 0.1;
bool bVerbose = false;
bool help = false;
bool bCont = false;
string sInput, sOutput = "detect.jpg";
string modelPath = ".";
auto cli = (
value("input path", sInput),
option("-cl").set(fConfident).doc("confidence level of detection, default: 0.1"),
option("-vv", "--debug").set(bVerbose).doc("verbose prints"),
option("-human", "--human-only").set(bHumanOnly).doc("detect only human object"),
option("-c", "--config-path").set(modelPath).doc("model and configuration path"),
option("-h", "--help").set(help).doc("print this help info"),
option("-o", "--output").set(sOutput).doc("output, eg: a.jpg; b.avi"),
option("-r", "--continue").set(bCont).doc("continue detection, default: false")
);
if(!parse(argc, argv, cli) || help) {
stringstream s;
s << make_man_page(cli, argv[0]);
spdlog::info(s.str());
exit(0);
}
if(bVerbose) {
spdlog::set_level(spdlog::level::debug);
}
YoloDectect detector(modelPath, bHumanOnly, fConfident, bCont);
detector.process(sInput, sOutput);
}
\ No newline at end of file
......@@ -28,8 +28,8 @@ public:
private:
// Initialize the parameters
const string selfId = "YoloDetector";
float confThreshold = 0.5; // Confidence threshold
float nmsThreshold = 0.4; // Non-maximum suppression threshold
float confThreshold = 0.1; // Confidence threshold
float nmsThreshold = 0.2; // Non-maximum suppression threshold
int inpWidth = 416; // Width of network's input image
int inpHeight = 416; // Height of network's input image
vector<string> classes;
......@@ -38,10 +38,14 @@ private:
VideoCapture cap;
VideoWriter video;
bool bOutputIsImg = false;
bool bInputIsImage = true;
string outFileBase;
bool cmdStop = false;
unsigned int wrapNum = 0;
unsigned int numLogSkip = 0;
bool bHumanOnly = false;
bool bContinue = true;
int cameNo = -1;
// Get the names of the output layers
vector<String> getOutputsNames(const Net& net)
......@@ -109,6 +113,12 @@ private:
int left = centerX - width / 2;
int top = centerY - height / 2;
if(bHumanOnly){
if(classes[classIdPoint.x] != "person"){
continue;
}
}
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
......@@ -137,12 +147,17 @@ protected:
//
public:
typedef int (*callback)(vector<tuple<string, double, Rect>>&, Mat);
YoloDectect(string path = ".", unsigned int _wrapNum = 10, unsigned int _numLogSkip = 380)
YoloDectect(string path = ".", bool _humanOnly = false, float confThresh = 0.1, bool _bContinue = true, unsigned int _wrapNum = 10, unsigned int _numLogSkip = 380)
{
if(path.empty()) {
path = ".";
}
bHumanOnly = _humanOnly;
bContinue = _bContinue;
confThreshold = confThresh;
wrapNum = _wrapNum;
numLogSkip = _numLogSkip;
......@@ -167,7 +182,7 @@ public:
net = readNetFromDarknet(modCfg, modWeights);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
spdlog::info("{} inited", selfId);
spdlog::debug("{} inited", selfId);
}
vector<tuple<string, double, Rect>> process(Mat &inFrame, Mat* pOutFrame, bool bModify = false)
......@@ -194,7 +209,7 @@ public:
if(numLogSkip == 0 || numFrameProcessed % numLogSkip == 0) {
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
spdlog::info("{} infer time: {} ms", selfId, t);
spdlog::debug("{} infer time: {} ms", selfId, t);
}
if(pOutFrame != nullptr){
inFrame.convertTo(*pOutFrame, CV_8U);
......@@ -204,15 +219,27 @@ public:
return ret;
}
int process(string inVideoUri, string outFile = "processed.jpg", bool bHumanExit = false, callback cb = nullptr)
int process(string inVideoUri, string outFile = "processed.jpg", callback cb = nullptr)
{
if(inVideoUri.empty()) {
inVideoUri = "0";
}
if(!cap.open(inVideoUri)) {
try{
if(inVideoUri.substr(inVideoUri.find_last_of(".") + 1) == "mp4"||(cameNo = stoi(inVideoUri)) >= 0) {
bInputIsImage = false;
}
}catch(Exception &e) {
}
if(!bInputIsImage) {
if((cameNo == -1 && !cap.open(inVideoUri))|| (cameNo != -1 && !cap.open(cameNo)))
{
spdlog::error("{} failed to open input video {}", selfId, inVideoUri);
return -1;
exit(1);
}
}
ghc::filesystem::path p(outFile);
......@@ -221,9 +248,14 @@ public:
if((outFile.substr(outFile.find_last_of(".") + 1) == "jpg")) {
bOutputIsImg = true;
outFileBase = string(dir / p.stem());
spdlog::info("{} outFileBase {}", selfId, outFileBase);
spdlog::debug("{} outFileBase {}", selfId, outFileBase);
}
else {
if(bInputIsImage) {
spdlog::error("{} can't output image {} as video {}, invalid params combination", selfId, inVideoUri, outFile);
exit(1);
}
bOutputIsImg = false;
if(!video.open(outFile, VideoWriter::fourcc('M','J','P','G'), 28, Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)))) {
spdlog::error("{} failed to open output video {}", selfId, outFile);
......@@ -231,7 +263,7 @@ public:
}
}
spdlog::info("{} try to process video {} to {}", selfId, inVideoUri, outFile);
spdlog::debug("{} try to process video {} to {}", selfId, inVideoUri, outFile);
unsigned long frameCnt = 0;
unsigned long detCnt = 0, skipCnt = 0;
......@@ -242,14 +274,22 @@ public:
break;
}
if(bInputIsImage){
frame = imread(inVideoUri);
if(!frame.data){
spdlog::error("{} failed to read image {}", selfId, inVideoUri);
exit(1);
}
cmdStop = true;
}
else{
if(!cap.read(frame)) {
spdlog::info("{} done reading frame from {}", selfId, inVideoUri);
break;
}
frameCnt++;
if(frameCnt %100 == 0)
spdlog::info("framecnt {}", frameCnt);
spdlog::debug("framecnt {}", frameCnt);
if(frameCnt % 30 != 0 ){
continue;
......@@ -259,38 +299,42 @@ public:
if (frame.empty()) {
continue;
}
}
vector<tuple<string, double, Rect>> ret = process(frame, &outFrame, true);
if(cb == nullptr) {
if(ret.size() == 0 && bOutputIsImg) {
// no detection
if(numLogSkip == 0|| skipCnt % numLogSkip == 0) {
spdlog::info("{} no valid object detected skipped frame count {}", selfId, skipCnt);
spdlog::debug("{} no valid object detected skipped frame count {}", selfId, skipCnt);
}
skipCnt++;
continue;
}
if(bHumanExit){
if (bOutputIsImg) {
if(bHumanOnly){
for(auto &[s, c, r]:ret) {
if (s == "person"){
string ofname = outFileBase + "_person.jpg";
auto ms = chrono::duration_cast<chrono::milliseconds >(chrono::system_clock::now().time_since_epoch()).count();
string ofname = outFileBase + "_person_" + to_string(ms) + ".jpg";
imwrite(ofname, outFrame);
spdlog::info("found human {} x: {}, y: {}, w: {}, h: {}", c, r.x, r.y, r.width, r.height);
if(!bContinue){
cmdStop = true;
break;
}
}
}
if (bOutputIsImg) {
}else{
if(wrapNum > 0) {
detCnt = detCnt % wrapNum;
}
string ofname = outFileBase + to_string(detCnt) + ".jpg";
imwrite(ofname, outFrame);
detCnt++;
}
}
else {
video.write(outFrame);
}
......@@ -299,6 +343,7 @@ public:
}
}
spdlog::info("{} done processing {}", selfId, inVideoUri);
cap.release();
if(!bOutputIsImg) video.release();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论