1 Star 0 Fork 0

dx/LightTrack-ncnn

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
LightTrack.cpp 12.45 KB
一键复制 编辑 原始数据 按行查看 历史
xiongzhuang 提交于 2023-03-31 16:05 . fix size penalty bug.
//
// Created by xiongzhuang on 2021/10/8.
//
#include "LightTrack.h"
#include "timer.h"
inline float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x) {
return 1.0f / (1.0f + fast_exp(-x));
}
static float sz_whFun(cv::Point2f wh) {
float pad = (wh.x + wh.y) * 0.5f;
float sz2 = (wh.x + pad) * (wh.y + pad);
return std::sqrt(sz2);
}
static std::vector<float> sz_change_fun(std::vector<float> w, std::vector<float> h, float sz) {
int rows = int(std::sqrt(h.size()));
int cols = int(std::sqrt(w.size()));
std::vector<float> pad(rows * cols, 0);
std::vector<float> sz2;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
pad[i * cols + j] = (w[i * cols + j] + h[i * cols + j]) * 0.5f;
}
}
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
float t = std::sqrt((w[i * cols + j] + pad[i * cols + j]) * (h[i * cols + j] + pad[i * cols + j])) / sz;
sz2.push_back(std::max(t, (float) 1.0 / t));
}
}
return sz2;
}
static std::vector<float> ratio_change_fun(std::vector<float> w, std::vector<float> h, cv::Point2f target_sz) {
int rows = int(std::sqrt(h.size()));
int cols = int(std::sqrt(w.size()));
float ratio = target_sz.x / target_sz.y;
std::vector<float> sz2;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
float t = ratio / (w[i * cols + j] / h[i * cols + j]);
sz2.push_back(std::max(t, (float) 1.0 / t));
}
}
return sz2;
}
LightTrack::LightTrack(const char *model_init, const char *model_update) {
score_size = int(round(this->instance_size / this->total_stride));
std::string model_init_str = model_init;
std::string model_update_str = model_update;
this->load_model(model_init, model_update);
}
LightTrack::~LightTrack() {
}
void LightTrack::init(const uint8_t *img, Bbox &box, int im_h , int im_w) {
ori_img_h = im_h;
ori_img_w = im_w;
this->target_sz.x = box.x1-box.x0;
this->target_sz.y = box.y1-box.y0;
this->target_pos.x = box.x0 + (box.x1-box.x0)/2;
this->target_pos.y = box.y0 + (box.y1-box.y0)/2;
std::cout << "init target pos: " << target_pos << std::endl;
std::cout << "init target_sz: " << target_sz << std::endl;
this->grids();
// 对模板图像而言:在第一帧以s_z为边长,以目标中心为中心点,截取图像补丁(如果超出第一帧的尺寸,用均值填充)。之后将其resize为127x127x3.成为模板图像
// context = 1/2 * (w+h) = 2*pad
float wc_z = target_sz.x + context_amount * (target_sz.x + target_sz.y);
float hc_z = target_sz.y + context_amount * (target_sz.x + target_sz.y);
// z_crop size = sqrt((w+2p)*(h+2p))
float s_z = round(sqrt(wc_z * hc_z)); // orignal size
cv::Mat z_crop;
cv::Mat img_(im_h, im_w, CV_8UC3, (void*)img, im_w*3);
z_crop = get_subwindow_tracking(img_, target_pos, exemplar_size, int(s_z));
// net init
ncnn::Extractor ex_init = net_init.create_extractor();
ex_init.set_light_mode(true);
ex_init.set_num_threads(6);
ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(z_crop.data, ncnn::Mat::PIXEL_BGR2RGB, z_crop.cols, z_crop.rows);
ncnn_img.substract_mean_normalize(this->mean_vals, this->norm_vals);
ex_init.input("input1", ncnn_img);
ex_init.extract("output.1", zf);
std::vector<float> hanning(score_size, 0); // 18
window.resize(score_size*score_size);
for (int i = 0; i < score_size; i++) {
float w = 0.5f - 0.5f * std::cos(2 * 3.1415926535898f * i / (score_size - 1));
hanning[i] = w;
}
for (int i = 0; i < score_size; i++) {
for (int j = 0; j < score_size; j++) {
window[i * score_size + j] = hanning[i] * hanning[j];
}
}
}
void LightTrack::update(const cv::Mat &x_crops, float scale_z) {
time_checker time2, time3, time4, time5;
time2.start();
ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(x_crops.data, ncnn::Mat::PIXEL_BGR2RGB, x_crops.cols, x_crops.rows);
ncnn_img.substract_mean_normalize(this->mean_vals, this->norm_vals);
time2.stop();
time2.show_distance("Update stage ---- input seting cost time");
time3.start();
// net backbone
ncnn::Extractor ex_update = net_update.create_extractor();
ex_update.set_light_mode(true);
ex_update.set_num_threads(6);
#if NCNN_VULKAN and USE_GPU
std::cout << NCNN_VULKAN << std::endl;
ex_update.opt.use_vulkan_compute = true;
#endif
ex_update.input("input1", zf);
ex_update.input("input2", ncnn_img);
ncnn::Mat cls_score, bbox_pred;
ex_update.extract("output.1", cls_score); // [c, w, h] = [1, 18, 18]
ex_update.extract("output.2", bbox_pred); // [c, w, h] = [4, 18, 18]
time3.stop();
time3.show_distance("Update stage ---- output cls_score and bbox_pred extracting cost time");
time4.start();
// manually call sigmoid on the output
std::vector<float> cls_score_sigmoid;
float *cls_score_data = (float *) cls_score.data;
cls_score_sigmoid.clear();
int cols = cls_score.w;
int rows = cls_score.h;
for (int i = 0; i < cols * rows; i++) // 18 * 18
{
cls_score_sigmoid.push_back(sigmoid(cls_score_data[i]));
}
std::vector<float> pred_x1(cols * rows, 0), pred_y1(cols * rows, 0), pred_x2(cols * rows, 0), pred_y2(cols * rows,
0);
float *bbox_pred_data1 = bbox_pred.channel(0);
float *bbox_pred_data2 = bbox_pred.channel(1);
float *bbox_pred_data3 = bbox_pred.channel(2);
float *bbox_pred_data4 = bbox_pred.channel(3);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
pred_x1[i * cols + j] = this->grid_to_search_x[i * cols + j] - bbox_pred_data1[i * cols + j];
pred_y1[i * cols + j] = this->grid_to_search_y[i * cols + j] - bbox_pred_data2[i * cols + j];
pred_x2[i * cols + j] = this->grid_to_search_x[i * cols + j] + bbox_pred_data3[i * cols + j];
pred_y2[i * cols + j] = this->grid_to_search_y[i * cols + j] + bbox_pred_data4[i * cols + j];
}
}
// size penalty (1)
std::vector<float> w(cols * rows, 0), h(cols * rows, 0);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
w[i * cols + j] = pred_x2[i * cols + j] - pred_x1[i * cols + j];
h[i * cols + j] = pred_y2[i * cols + j] - pred_y1[i * cols + j];
}
}
float sz_wh = sz_whFun(target_sz);
std::vector<float> s_c = sz_change_fun(w, h, sz_wh);
std::vector<float> r_c = ratio_change_fun(w, h, target_sz);
std::vector<float> penalty(rows * cols, 0);
for (int i = 0; i < rows * cols; i++) {
penalty[i] = std::exp(-1 * (s_c[i] * r_c[i] - 1) * penalty_tk);
}
// window penalty
std::vector<float> pscore(rows * cols, 0);
int r_max = 0, c_max = 0;
float maxScore = 0;
for (int i = 0; i < rows * cols; i++) {
pscore[i] = (penalty[i] * cls_score_sigmoid[i]) * (1 - window_influence) + window[i] * window_influence;
if (pscore[i] > maxScore) {
// get max
maxScore = pscore[i];
r_max = std::floor(i / rows);
c_max = ((float) i / rows - r_max) * rows;
}
}
time4.stop();
time4.show_distance("Update stage ---- postprocess cost time");
std::cout << "pscore_window max score is: " << pscore[r_max * cols + c_max] << std::endl;
// to real size
float pred_x1_real = pred_x1[r_max * cols + c_max]; // pred_x1[r_max, c_max]
float pred_y1_real = pred_y1[r_max * cols + c_max];
float pred_x2_real = pred_x2[r_max * cols + c_max];
float pred_y2_real = pred_y2[r_max * cols + c_max];
float pred_xs = (pred_x1_real + pred_x2_real) / 2;
float pred_ys = (pred_y1_real + pred_y2_real) / 2;
float pred_w = pred_x2_real - pred_x1_real;
float pred_h = pred_y2_real - pred_y1_real;
float diff_xs = pred_xs - instance_size / 2;
float diff_ys = pred_ys - instance_size / 2;
diff_xs /= scale_z;
diff_ys /= scale_z;
pred_w /= scale_z;
pred_h /= scale_z;
target_sz.x = target_sz.x / scale_z;
target_sz.y = target_sz.y / scale_z;
// size learning rate
float lr_ = penalty[r_max * cols + c_max] * cls_score_sigmoid[r_max * cols + c_max] * lr;
// size rate
auto res_xs = float(target_pos.x + diff_xs);
auto res_ys = float(target_pos.y + diff_ys);
float res_w = pred_w * lr + (1 - lr_) * target_sz.x;
float res_h = pred_h * lr + (1 - lr_) * target_sz.y;
target_pos.x = int(res_xs);
target_pos.y = int(res_ys);
target_sz.x = target_sz.x * (1 - lr_) + lr_ * res_w;
target_sz.y = target_sz.y * (1 - lr_) + lr_ * res_h;
}
void LightTrack::track(const uint8_t *img) {
time_checker time1;
float hc_z = target_sz.y + context_amount * (target_sz.x + target_sz.y);
float wc_z = target_sz.x + context_amount * (target_sz.x + target_sz.y);
float s_z = sqrt(wc_z * hc_z); // roi size
float scale_z = exemplar_size / s_z; // 127/
float d_search = (instance_size - exemplar_size) / 2; // backbone_model_size - init_model_size = 288-127
float pad = d_search / scale_z;
float s_x = s_z + 2 * pad;
time1.start();
cv::Mat x_crop;
cv::Mat img_(ori_img_h, ori_img_w, CV_8UC3, (void*)img, ori_img_w*3);
x_crop = get_subwindow_tracking(img_, target_pos, instance_size, int(s_x));
time1.stop();
time1.show_distance("Update stage ---- get subwindow cost time");
// update
target_sz.x = target_sz.x * scale_z;
target_sz.y = target_sz.y * scale_z;
this->update(x_crop, scale_z);
target_pos.x = std::max(0, min(ori_img_w, target_pos.x));
target_pos.y = std::max(0, min(ori_img_h, target_pos.y));
target_sz.x = float(std::max(10, min(ori_img_w, int(target_sz.x))));
target_sz.y = float(std::max(10, min(ori_img_h, int(target_sz.y))));
std::cout << "track target pos: " << target_pos << std::endl;
std::cout << "track target_sz: " << target_sz << std::endl;
}
void LightTrack::load_model(std::string model_init, std::string model_update) {
this->net_init.load_param((model_init + ".param").c_str());
this->net_init.load_model((model_init + ".bin").c_str());
this->net_update.load_param((model_update + ".param").c_str());
this->net_update.load_model((model_update + ".bin").c_str());
}
void LightTrack::grids() {
/*
each element of feature map on input search image
:return: H*W*2 (position for each element)
*/
int sz = score_size; // 18
this->grid_to_search_x.resize(sz * sz, 0);
this->grid_to_search_y.resize(sz * sz, 0);
for (int i = 0; i < sz; i++) {
for (int j = 0; j < sz; j++) {
this->grid_to_search_x[i * sz + j] = j * total_stride; // 0~18*16 = 0~288
this->grid_to_search_y[i * sz + j] = i * total_stride;
}
}
}
cv::Mat LightTrack::get_subwindow_tracking(cv::Mat im, cv::Point2f pos, int model_sz, int original_sz) {
float c = (float) (original_sz + 1) / 2;
int context_xmin = std::round(pos.x - c);
int context_xmax = context_xmin + original_sz - 1;
int context_ymin = std::round(pos.y - c);
int context_ymax = context_ymin + original_sz - 1;
int left_pad = int(std::max(0, -context_xmin));
int top_pad = int(std::max(0, -context_ymin));
int right_pad = int(std::max(0, context_xmax - im.cols + 1));
int bottom_pad = int(std::max(0, context_ymax - im.rows + 1));
context_xmin += left_pad;
context_xmax += left_pad;
context_ymin += top_pad;
context_ymax += top_pad;
cv::Mat im_path_original;
if (top_pad > 0 || left_pad > 0 || right_pad > 0 || bottom_pad > 0) {
cv::Mat te_im = cv::Mat::zeros(im.rows + top_pad + bottom_pad, im.cols + left_pad + right_pad, CV_8UC3);
//te_im(cv::Rect(left_pad, top_pad, im.cols, im.rows)) = im;
cv::copyMakeBorder(im, te_im, top_pad, bottom_pad, left_pad, right_pad, cv::BORDER_CONSTANT, 0.f);
im_path_original = te_im(
cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
} else
im_path_original = im(
cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
cv::Mat im_path;
cv::resize(im_path_original, im_path, cv::Size(model_sz, model_sz));
return im_path;
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C++
1
https://gitee.com/dxxiandianzi/LightTrack-ncnn.git
git@gitee.com:dxxiandianzi/LightTrack-ncnn.git
dxxiandianzi
LightTrack-ncnn
LightTrack-ncnn
main

搜索帮助

0d507c66 1850385 C8b1a773 1850385