master

分支 (1)

管理

管理

master

yolov5-openvion
/
yolo_object_detection.cpp

#include <iostream>
#include "yolo_object_detection.h"

using namespace std;
using namespace cv;
using namespace cv::dnn;
using namespace InferenceEngine;

std::vector<float> anchors = {
	10,13, 16,30, 33,23,
	30,61, 62,45, 59,119,
	116,90, 156,198, 373,326
};

int get_anchor_index(int scale_w, int scale_h) {
	if (scale_w == 20) {
		return 12;
	}
	if (scale_w == 40) {
		return 6;
	}
	if (scale_w == 80) {
		return 0;
	}
	return -1;
}

float get_stride(int scale_w, int scale_h) {
	if (scale_w == 20) {
		return 32.0;
	}
	if (scale_w == 40) {
		return 16.0;
	}
	if (scale_w == 80) {
		return 8.0;
	}
	return -1;
}

float sigmoid_function(float a)
{
	float b = 1. / (1. + exp(-a));
	return b;
}

void YOLOObjectDetection::detect(std::string xml, std::string bin, cv::Mat frame, int camera_index) {
	VideoCapture cap;

	if (camera_index == 0) {
		cap.open(0);
	}


	if (frame.empty()) {
		cap.read(frame);
	}
	int image_height = frame.rows;
	int image_width = frame.cols;

	// ����IE���, ��ѯ֧��Ӳ���豸
	InferenceEngine::Core ie;
	vector<string> availableDevices = ie.GetAvailableDevices();
	for (int i = 0; i < availableDevices.size(); i++) {
		printf("supported device name : %s \n", availableDevices[i].c_str());
	}
	//  ���ؼ��ģ��
	auto network = ie.ReadNetwork(xml, bin);
	// auto network = ie.ReadNetwork(xml);

	// �������������������Ϣ
	InferenceEngine::InputsDataMap input_info(network.getInputsInfo());
	InferenceEngine::OutputsDataMap output_info(network.getOutputsInfo());

	// ���������ʽ
	for (auto &item : input_info) {
		auto input_data = item.second;
		input_data->setPrecision(Precision::FP32);
		input_data->setLayout(Layout::NCHW);
		input_data->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
		input_data->getPreProcess().setColorFormat(ColorFormat::RGB);
	}

	// ���������ʽ
	for (auto &item : output_info) {
		auto output_data = item.second;
		output_data->setPrecision(Precision::FP32);
	}
	auto executable_network = ie.LoadNetwork(network, "CPU");

	// �����ƶ�ͼ
	auto infer_request = executable_network.CreateInferRequest();
	float scale_x = image_width / 640.0;
	float scale_y = image_height / 640.0;

	if (camera_index == -1) {
		inferAndOutput(frame, infer_request, input_info, output_info, scale_x, scale_y);
		cv::imshow("OpenVINO2021R2+YOLOv5������", frame);
	}
	else {
		while (true) {
			bool ret = cap.read(frame);
			if (frame.empty()) {
				break;
			}
			inferAndOutput(frame, infer_request, input_info, output_info, scale_x, scale_y);
			cv::imshow("YOLOv5s+OpenVINO2021R02+Demo", frame);
			char c = cv::waitKey(1);
			if (c == 27) {
				break;
			}
		}
	}
	waitKey(0);
	destroyAllWindows();
}

void YOLOObjectDetection::inferAndOutput(cv::Mat &frame, InferenceEngine::InferRequest &infer_request,
	InferenceEngine::InputsDataMap & input_info, InferenceEngine::OutputsDataMap &output_info, float sx, float sy) {
	int64 start = getTickCount();

	// �������������
	vector<Rect> boxes;
	vector<int> classIds;
	vector<float> confidences;
	/** Iterating over all input blobs **/
	for (auto & item : input_info) {
		auto input_name = item.first;

		/** Getting input blob **/
		auto input = infer_request.GetBlob(input_name);
		size_t num_channels = input->getTensorDesc().getDims()[1];
		size_t h = input->getTensorDesc().getDims()[2];
		size_t w = input->getTensorDesc().getDims()[3];
		size_t image_size = h*w;
		Mat blob_image;
		resize(frame, blob_image, Size(w, h));
		cvtColor(blob_image, blob_image, COLOR_BGR2RGB);

		// NCHW
		float* data = static_cast<float*>(input->buffer());
		for (size_t row = 0; row < h; row++) {
			for (size_t col = 0; col < w; col++) {
				for (size_t ch = 0; ch < num_channels; ch++) {
					data[image_size*ch + row*w + col] = float(blob_image.at<Vec3b>(row, col)[ch]) / 255.0;
				}
			}
		}
	}

	// ִ��Ԥ��
	infer_request.Infer();

	for (auto &item : output_info) {
		auto output_name = item.first;
		auto output = infer_request.GetBlob(output_name);

		const float* output_blob = static_cast<PrecisionTrait<Precision::FP32>::value_type*>(output->buffer());
		const SizeVector outputDims = output->getTensorDesc().getDims();
		const int out_n = outputDims[0];
		const int out_c = outputDims[1];
		const int side_h = outputDims[2];
		const int side_w = outputDims[3];
		const int side_data = outputDims[4];

		float stride = get_stride(side_h, side_h);
		int anchor_index = get_anchor_index(side_h, side_h);
		int side_square = side_h*side_w;
		int side_data_square = side_square*side_data;
		int side_data_w = side_w*side_data;

		for (int i = 0; i < side_square; ++i) {
			for (int c = 0; c < out_c; c++) {
				int row = i / side_h;
				int col = i % side_h;
				int object_index = c*side_data_square + row*side_data_w + col*side_data;

				// ��ֵ����
				float conf = sigmoid_function(output_blob[object_index + 4]);
				if (conf < 0.25) {
					continue;
				}

				// ����cx, cy, width, height
				float x = (sigmoid_function(output_blob[object_index]) * 2 - 0.5 + col)*stride;
				float y = (sigmoid_function(output_blob[object_index + 1]) * 2 - 0.5 + row)*stride;
				float w = pow(sigmoid_function(output_blob[object_index + 2]) * 2, 2)*anchors[anchor_index + c * 2];
				float h = pow(sigmoid_function(output_blob[object_index + 3]) * 2, 2)*anchors[anchor_index + c * 2 + 1];
				float max_prob = -1;
				int class_index = -1;

				// �������
//				for (int d = 5; d < 85; d++)
                for (int d = 5; d < 14; d++) {
					float prob = sigmoid_function(output_blob[object_index + d]);
					if (prob > max_prob) {
						max_prob = prob;
						class_index = d - 5;
					}
				}

				// ת��Ϊtop-left, bottom-right����
				int x1 = saturate_cast<int>((x - w / 2) * sx);  // top left x
				int y1 = saturate_cast<int>((y - h / 2) * sy);  // top left y
				int x2 = saturate_cast<int>((x + w / 2) * sx);  // bottom right x
				int y2 = saturate_cast<int>((y + h / 2) * sy); // bottom right y

				// �������
				classIds.push_back(class_index);
				confidences.push_back((float)conf);
				boxes.push_back(Rect(x1, y1, x2 - x1, y2 - y1));
			}
		}
	}

	vector<int> indices;
	cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		rectangle(frame, box, Scalar(140, 199, 0), 4, 8, 0);
	}
	float fps = getTickFrequency() / (getTickCount() - start);
	float time = (getTickCount() - start) / getTickFrequency();

	ostringstream ss;
	ss << "FPS : " << fps << " detection time: " << time * 1000 << " ms";
	cv::putText(frame, ss.str(), Point(20, 50), 0, 1.0, Scalar(0, 0, 255), 2);
}

bool YOLOObjectDetection::isAallready()
{
	if(m_input_info.size()>0&&m_output_info.size()>0)
		return  true;
	else
		return  false;
}


void YOLOObjectDetection::ReadNetwork(std::string xml, std::string bin)
{
	//��ѯ֧�ֵ�Ӳ���豸
	vector<string> availableDevices = m_ie.GetAvailableDevices();
	for (int i = 0; i < availableDevices.size(); i++)
	{
		printf("supported device names : %s \n", availableDevices[i].c_str());
	}
	//���ؼ��ģ��
	auto network = m_ie.ReadNetwork(xml, bin);
	// �������������������Ϣ
	m_input_info = InferenceEngine::InputsDataMap(network.getInputsInfo());
	m_output_info = InferenceEngine::OutputsDataMap(network.getOutputsInfo());
	// ���������ʽ
	for (auto &item : m_input_info) {
		auto input_data = item.second;
		input_data->setPrecision(Precision::FP32);
		input_data->setLayout(Layout::NCHW);
		input_data->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
		input_data->getPreProcess().setColorFormat(ColorFormat::RGB);
	}
	// ���������ʽ
	for (auto &item : m_output_info)
	{
		auto output_data = item.second;
		output_data->setPrecision(Precision::FP32);
	}
	m_executable_network = m_ie.LoadNetwork(network, "CPU");
	printf("�������");
	printf("i=%ld,out=%ld",m_input_info.size(),m_output_info.size());

}

void YOLOObjectDetection::DetectNetWork(cv::Mat &frame, float threshold)
{

		int64 start = getTickCount();
		auto infer_request = m_executable_network.CreateInferRequest();
		int image_height = frame.rows;
		int image_width = frame.cols;

		float scale_x = image_width / 640.0;
		float scale_y = image_height / 640.0;
		// �������������
		vector<Rect> boxes;
		vector<int> classIds;
		vector<float> confidences;
		/** Iterating over all input blobs **/
		for (auto & item : m_input_info)
		{
			auto input_name = item.first;
			/** Getting input blob **/
			auto input = infer_request.GetBlob(input_name);
			size_t num_channels = input->getTensorDesc().getDims()[1];
			size_t h = input->getTensorDesc().getDims()[2];
			size_t w = input->getTensorDesc().getDims()[3];
			size_t image_size = h * w;
			Mat blob_image;
			resize(frame, blob_image, Size(w, h));
			cvtColor(blob_image, blob_image, COLOR_BGR2RGB);

			// NCHW
			float* data = static_cast<float*>(input->buffer());
			for (size_t row = 0; row < h; row++)
			{
				for (size_t col = 0; col < w; col++)
				{
					for (size_t ch = 0; ch < num_channels; ch++)
					{
						data[image_size*ch + row * w + col] = float(blob_image.at<Vec3b>(row, col)[ch]) / 255.0;
					}
				}
			}
		}

		// ִ��Ԥ��
		infer_request.Infer();

		for (auto &item : m_output_info)
		{
			auto output_name = item.first;
			auto output = infer_request.GetBlob(output_name);

			const float* output_blob = static_cast<PrecisionTrait<Precision::FP32>::value_type*>(output->buffer());
			const SizeVector outputDims = output->getTensorDesc().getDims();
			//printf("vector size %d %d %d %d %d:",outputDims[0],outputDims[1],outputDims[2],outputDims[3],outputDims[4]);
			const int out_n = outputDims[0];
			const int out_c = outputDims[1];
			const int side_h = outputDims[2];
			const int side_w = outputDims[3];
			const int side_data = outputDims[4];

			float stride = get_stride(side_h, side_h);
			int anchor_index = get_anchor_index(side_h, side_h);
			int side_square = side_h * side_w;
			int side_data_square = side_square * side_data;
			int side_data_w = side_w * side_data;

			//80������85,һ������6,n������n+5
			for (int i = 0; i < side_square; ++i)
			{
				for (int c = 0; c < out_c; c++)
				{
					int row = i / side_h;
					int col = i % side_h;
					int object_index = c * side_data_square + row * side_data_w + col * side_data;

					// ��ֵ����
					//ע��˴�����ֵ�ǿ������prob�˻�����ֵ
					float conf = sigmoid_function(output_blob[object_index + 4]);
					if (conf < threshold)
					{
						continue;
					}

					// ����cx, cy, width, height
					float x = (sigmoid_function(output_blob[object_index]) * 2 - 0.5 + col)*stride;
					float y = (sigmoid_function(output_blob[object_index + 1]) * 2 - 0.5 + row)*stride;
					float w = pow(sigmoid_function(output_blob[object_index + 2]) * 2, 2)*anchors[anchor_index + c * 2];
					float h = pow(sigmoid_function(output_blob[object_index + 3]) * 2, 2)*anchors[anchor_index + c * 2 + 1];
					float max_prob = -1;
					int class_index = -1;

					// �������
					for (int d = 5; d < 14; d++)
					{
						float prob = sigmoid_function(output_blob[object_index + d]);
						if (prob > max_prob)
						{
							max_prob = prob;
							class_index = d ;
						}
					}

					// ת��Ϊtop-left, bottom-right����
					int x1 = saturate_cast<int>((x - w / 2) * scale_x);  // top left x
					int y1 = saturate_cast<int>((y - h / 2) * scale_y);  // top left y
					int x2 = saturate_cast<int>((x + w / 2) * scale_x);  // bottom right x
					int y2 = saturate_cast<int>((y + h / 2) * scale_y); // bottom right y

					// �������
					classIds.push_back(class_index);
					confidences.push_back((float)conf);
					boxes.push_back(Rect(x1, y1, x2 - x1, y2 - y1));
				}
			}
		}

		vector<int> indices;
		cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indices);

		float fps = getTickFrequency() / (getTickCount() - start);
		float time = (getTickCount() - start) / getTickFrequency();

		for (size_t i = 0; i < indices.size(); ++i)
		{
			int idx = indices[i];
			Rect box = boxes[idx];
			rectangle(frame, box, Scalar(140, 199, 0), 4, 8, 0);
			int calss = classIds[idx];
			float score = confidences[idx];
			string str = to_string(calss) + ":" + to_string(score);
			cv::putText(frame, str, Point(box.x, box.y), 0, 1.0, Scalar(0, 0, 255), 2);
		}
		ostringstream ss;
		ss << "FPS : " << fps << " detection time: " << time * 1000 << " ms";
		cv::putText(frame, ss.str(), Point(20, 50), 0, 1.0, Scalar(0, 0, 255), 2);
		printf("Find : %ld \n", indices.size());
		cv::imshow("YOLOv5s+OpenVINO2021R02+Demo", frame);
		char c = cv::waitKey(0);
}
void YOLOObjectDetection::drawBboxLabels(cv::Mat &src, cv::Rect &rect, int &classInx,int & cols, int & rows) {
    std::cout << rect.width  << "x" << rect.height << std::endl;
    int xmin = ((float)cols/640)*rect.x;
    int ymin = ((float)rows/640)*rect.y ;//+ 32;
    float yminf = (rows/640)*rect.y;
//    cout <<"ymin: " << ymin  << " yminf: " <<yminf <<endl;
    int width = ((float)cols/640)*rect.width;
    int height = ((float)rows/640)*rect.height;
    cv::Rect rect1(xmin,ymin,width,height);
//    //cv::Rect rect1(xmin,ymin,width,height);
//    if(classInx == 0){
//        cv::rectangle(src, rect1, colors[0],1, LINE_8,0);
//        cv::putText(src,classNames[0], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[0], 1);
//    }
//    else if(classInx == 1){
//        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
//        cv::putText(src,classNames[1], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[1], 1);
//    }
//    else if(classInx == 2){
//        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
//        cv::putText(src,classNames[2], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[2], 1);
//    }
//    else if(classInx == 3){
//        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
//        cv::putText(src,classNames[3], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[3], 1);
//    }
//    else if(classInx == 4){
//        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
//        cv::putText(src,classNames[4], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[4], 1);
//    }
//    cout <<"classInx: " << classInx;
    if(classInx == 0){
        cv::rectangle(src, rect1, colors[0],1, LINE_8,0);
        cv::putText(src,this->classNames[0], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[0], 1);
    }
    else if(classInx == 1){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[1], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[1], 1);
    }
    else if(classInx == 2){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[2], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[2], 1);
    }
    else if(classInx == 3){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[3], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[3], 1);
    }
    else if(classInx == 4){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[4], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[4], 1);
    }
    else if(classInx == 5){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[5], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[5], 1);
    }
    else if(classInx == 6){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[6], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[6], 1);
    }
    else if(classInx == 7){
        cv::rectangle(src, rect1, colors[1],1, LINE_8,0);
        cv::putText(src,this->classNames[7], cv::Point(xmin, ymin-2),  cv::FONT_HERSHEY_DUPLEX, 0.7, colors[7], 1);
    }
    else if(classInx == 8) {
        cv::rectangle(src, rect1, colors[1], 1, LINE_8, 0);
        cv::putText(src, this->classNames[8], cv::Point(xmin, ymin - 2), cv::FONT_HERSHEY_DUPLEX, 0.7, colors[8], 1);
    }
}