Computer/Coding
[opencv] 하드웨어 가속 비디오 디코딩 + YOLO
순박한시골청년
2021. 6. 21. 09:19
Opencv Cudacodec과 DNN 모듈을 이용한 비디오 디코딩&객체검출
Opencv 4.5.2 + Contirb 모듈
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
#include <iostream>
#include <queue>
#include <iterator>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <chrono>
#include "opencv2/opencv_modules.hpp"
#include <stdio.h>
#include <vector>
#include <numeric>
#include <time.h>
#include <Windows.h>
#include "opencv2/core.hpp"
#include "opencv2/cudacodec.hpp"
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/cudawarping.hpp>
using namespace std;
using namespace cv;
constexpr float CONFIDENCE_THRESHOLD = 0;
constexpr float NMS_THRESHOLD = 0.4;
constexpr int NUM_CLASSES = 5;
// colors for bounding boxes
const cv::Scalar colors[] = {
{ 0, 255, 255 },
{ 255, 255, 0 },
{ 0, 255, 0 },
{ 255, 0, 0 }
};
const auto NUM_COLORS = sizeof(colors) / sizeof(colors[0]);
void gpuVideoThread(int threadNum, int channel4gpu, string videoPath, int bimshow)
{
if (threadNum < channel4gpu)
{
cv::cuda::setDevice(0);
}
else if (threadNum < channel4gpu * 2)
{
cv::cuda::setDevice(1);
}
else if (threadNum < channel4gpu * 3)
{
cv::cuda::setDevice(2);
}
else if (threadNum < channel4gpu * 4)
{
cv::cuda::setDevice(3);
}
// 비디오 디코더
cuda::GpuMat gImg;
cv::Ptr<cv::cudacodec::VideoReader> d_reader = cv::cudacodec::createVideoReader(videoPath);
// Detection용 Mat
cv::Mat frame, blob;
std::vector<cv::Mat> detections;
// fps 계산용
int frameCnt = 0;
int nPrevSec = -1;
SYSTEMTIME stNow;
// Yolo
auto net = cv::dnn::readNetFromDarknet("D:/test/352.cfg", "D:/test/352.weights");
std::vector<std::string> class_names;
class_names.push_back("person"); class_names.push_back("car"); class_names.push_back("truck"); class_names.push_back("motoby"); class_names.push_back("bus");
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
auto output_names = net.getUnconnectedOutLayersNames();
// 루프 시작
for (;;)
{
if (!d_reader->nextFrame(gImg))
break;
// fps 계산용
frameCnt++;
// gpu mat 상태로 연산
int imageSize = 352;
cv::cuda::cvtColor(gImg, gImg, COLOR_RGBA2RGB);
cv::cuda::resize(gImg, gImg, Size(imageSize, imageSize));
// 일반 Mat로 변환
gImg.download(frame);
// YOLO
cv::dnn::blobFromImage(frame, blob, 1.0/255, cv::Size(imageSize, imageSize), cv::Scalar(), true, false, CV_32F);
net.setInput(blob);
net.forward(detections, output_names);
// 01 ----------------------------------------
std::vector<int> indices[NUM_CLASSES];
std::vector<cv::Rect> boxes[NUM_CLASSES];
std::vector<float> scores[NUM_CLASSES];
for (auto& output : detections)
{
const auto num_boxes = output.rows;
for (int i = 0; i < num_boxes; i++)
{
auto x = output.at<float>(i, 0) * frame.cols;
auto y = output.at<float>(i, 1) * frame.rows;
auto width = output.at<float>(i, 2) * frame.cols;
auto height = output.at<float>(i, 3) * frame.rows;
cv::Rect rect(x - width / 2, y - height / 2, width, height);
for (int c = 0; c < NUM_CLASSES; c++)
{
auto confidence = *output.ptr<float>(i, 5 + c);
if (confidence >= CONFIDENCE_THRESHOLD)
{
boxes[c].push_back(rect);
scores[c].push_back(confidence);
}
}
}
}
for (int c = 0; c < NUM_CLASSES; c++)
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]);
for (int c = 0; c < NUM_CLASSES; c++)
{
for (size_t i = 0; i < indices[c].size(); ++i)
{
const auto color = colors[c % NUM_COLORS];
auto idx = indices[c][i];
const auto& rect = boxes[c][idx];
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3);
std::ostringstream label_ss;
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx];
auto label = label_ss.str();
int baseline;
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED);
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0));
}
}
// 이미지 출력
//resize(frame, frame, Size(160, 160));
if (bimshow == 1)
{
imshow(to_string(threadNum), frame);
}
// 시간 측정 종료 및 출력
::GetLocalTime(&stNow);
if (stNow.wSecond != nPrevSec)
{
nPrevSec = stNow.wSecond;
cout << frameCnt << " / ";
frameCnt = 0;
}
waitKey(1);
Sleep(30);
}
}
int main(int argc, char **argv)
{
//string videoPath = "rtsp://admin:admin13579@192.168.0.94/profile2/media.smp";
string videoPath = "D:/test/test.mp4";
// 총 쓰레드 수
int num_threads = 8;
// 그래픽 카드당 처리 채널 수
int channel4grapich = 8;
int bimshow = 1;
if (argc == 5)
{
num_threads = atoi(argv[1]);
channel4grapich = atoi(argv[2]);
videoPath = argv[3];
bimshow = atoi(argv[4]);
}
// 쓰레드 벡터
vector<std::thread*> thread_Ptr_vec;
// 쓰레드 생성
for (int i = 0; i < num_threads; i++)
{
thread_Ptr_vec.push_back(new std::thread(gpuVideoThread, i, channel4grapich, videoPath, bimshow));
}
for (int i = 0; i < num_threads; i++)
{
thread_Ptr_vec[i]->join();
}
return 0;
}
|
cs |