Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++ Demo for person_reid_youtureid #277

Merged
merged 4 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions models/person_reid_youtureid/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.24.0)
project(opencv_zoo_person_reid_youtureid)

set(OPENCV_VERSION "4.10.0")
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")

# Find OpenCV
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})

add_executable(demo demo.cpp)
target_link_libraries(demo ${OpenCV_LIBS})
13 changes: 13 additions & 0 deletions models/person_reid_youtureid/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,26 @@ This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/openc

Run the following command to try the demo:

### Python
```shell
python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v

# get help regarding various parameters
python demo.py --help
```

### C++
```shell
# A typical and default installation path of OpenCV is /usr/local
cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
cmake --build build

./build/demo --query_dir=/path/to/query --gallery_dir=/path/to/gallery -v

# get help regarding various parameters
./build/demo --help
```

### License

All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
Expand Down
308 changes: 308 additions & 0 deletions models/person_reid_youtureid/demo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
#include <opencv2/opencv.hpp>
#include "opencv2/dnn.hpp"
#include <iostream>
#include <vector>
#include <map>
#include <string>
#include <numeric>


// YoutuReID class for person re-identification
class YoutuReID {
public:
YoutuReID(const std::string& model_path,
const cv::Size& input_size = cv::Size(128, 256),
int output_dim = 768,
const cv::Scalar& mean = cv::Scalar(0.485, 0.456, 0.406),
const cv::Scalar& std = cv::Scalar(0.229, 0.224, 0.225),
int backend_id = 0,
int target_id = 0)
: model_path_(model_path), input_size_(input_size),
output_dim_(output_dim), mean_(mean), std_(std),
backend_id_(backend_id), target_id_(target_id)
{

model_ = cv::dnn::readNet(model_path_);
model_.setPreferableBackend(backend_id_);
model_.setPreferableTarget(target_id_);
}

void setBackendAndTarget(int backend_id, int target_id) {
backend_id_ = backend_id;
target_id_ = target_id;
model_.setPreferableBackend(backend_id_);
model_.setPreferableTarget(target_id_);
}

void setInputSize(const cv::Size& input_size) {
input_size_ = input_size;
}

// Preprocess image by resizing, normalizing, and creating a blob
cv::Mat preprocess(const cv::Mat& image) {
cv::Mat img;
cv::cvtColor(image, img, cv::COLOR_BGR2RGB);
img.convertTo(img, CV_32F, 1.0 / 255.0);

// Normalize each channel separately
std::vector<cv::Mat> channels(3);
cv::split(img, channels);
channels[0] = (channels[0] - mean_[0]) / std_[0];
channels[1] = (channels[1] - mean_[1]) / std_[1];
channels[2] = (channels[2] - mean_[2]) / std_[2];
cv::merge(channels, img);

return cv::dnn::blobFromImage(img);
}

// Run inference to extract feature vector
cv::Mat infer(const cv::Mat& image) {
cv::Mat input_blob = preprocess(image);
model_.setInput(input_blob);
cv::Mat features = model_.forward();

if (features.dims == 4 && features.size[2] == 1 && features.size[3] == 1) {
features = features.reshape(1, {1, features.size[1]});
}

return features;
}

// Perform query, comparing each query image to each gallery image
std::vector<std::vector<int>> query(const std::vector<cv::Mat>& query_img_list,
const std::vector<cv::Mat>& gallery_img_list,
int topK = 5) {
std::vector<cv::Mat> query_features_list, gallery_features_list;
cv::Mat query_features, gallery_features;

for (size_t i = 0; i < query_img_list.size(); ++i) {
cv::Mat feature = infer(query_img_list[i]);
query_features_list.push_back(feature.clone());
}
cv::vconcat(query_features_list, query_features);
normalizeFeatures(query_features);

for (size_t i = 0; i < gallery_img_list.size(); ++i) {
cv::Mat feature = infer(gallery_img_list[i]);
gallery_features_list.push_back(feature.clone());
}
cv::vconcat(gallery_features_list, gallery_features);
normalizeFeatures(gallery_features);

cv::Mat dist = query_features * gallery_features.t();
return getTopK(dist, topK);
}

private:
// Normalize feature vectors row-wise to unit length
void normalizeFeatures(cv::Mat& features) {
const float epsilon = 1e-6;
for (int i = 0; i < features.rows; ++i) {
cv::Mat featureRow = features.row(i);
float norm = cv::norm(featureRow, cv::NORM_L2);
if (norm < epsilon) {
norm = epsilon;
}
featureRow /= norm;
}
}

// Retrieve Top-K indices from similarity matrix
std::vector<std::vector<int>> getTopK(const cv::Mat& dist, int topK) {
std::vector<std::vector<int>> indices(dist.rows);

for (int i = 0; i < dist.rows; ++i) {
std::vector<std::pair<float, int>> sim_index_pairs;
for (int j = 0; j < dist.cols; ++j) {
sim_index_pairs.emplace_back(dist.at<float>(i, j), j);
}
std::sort(sim_index_pairs.begin(), sim_index_pairs.end(),
[](const std::pair<float, int>& a, const std::pair<float, int>& b) {
return a.first > b.first;
});

for (int k = 0; k < topK && k < sim_index_pairs.size(); ++k) {
indices[i].push_back(sim_index_pairs[k].second);
}
}
return indices;
}

std::string model_path_;
cv::Size input_size_;
int output_dim_;
cv::Scalar mean_, std_;
int backend_id_;
int target_id_;
cv::dnn::Net model_;
};

// Read images from directory and return a pair of image list and file list
std::pair<std::vector<cv::Mat>, std::vector<std::string>> readImagesFromDirectory(const std::string& img_dir, int w = 128, int h = 256) {
std::vector<cv::Mat> img_list;
std::vector<std::string> file_list;

std::vector<std::string> file_names;
cv::glob(img_dir + "/*", file_names, false);

for (size_t i = 0; i < file_names.size(); ++i) {
std::string file_name = file_names[i].substr(file_names[i].find_last_of("/\\") + 1);
cv::Mat img = cv::imread(file_names[i]);
if (!img.empty()) {
cv::resize(img, img, cv::Size(w, h));
img_list.push_back(img);
file_list.push_back(file_name);
}
}
return std::make_pair(img_list, file_list);
}

// Visualize query and gallery results by creating concatenated images
std::map<std::string, cv::Mat> visualize(
const std::map<std::string, std::vector<std::string>>& results,
const std::string& query_dir,
const std::string& gallery_dir,
const cv::Size& output_size = cv::Size(128, 384)) {

std::map<std::string, cv::Mat> results_vis;

for (std::map<std::string, std::vector<std::string>>::const_iterator it = results.begin(); it != results.end(); ++it) {
const std::string& query_file = it->first;
const std::vector<std::string>& top_matches = it->second;

cv::Mat query_img = cv::imread(query_dir + "/" + query_file);
if (query_img.empty()) continue;

cv::resize(query_img, query_img, output_size);
cv::copyMakeBorder(query_img, query_img, 5, 5, 5, 5,
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
cv::putText(query_img, "Query", cv::Point(10, 30),
cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2);

cv::Mat concat_img = query_img;

for (size_t i = 0; i < top_matches.size(); ++i) {
cv::Mat gallery_img = cv::imread(gallery_dir + "/" + top_matches[i]);
if (gallery_img.empty()) continue;

cv::resize(gallery_img, gallery_img, output_size);
cv::copyMakeBorder(gallery_img, gallery_img, 5, 5, 5, 5,
cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
cv::putText(gallery_img, "G" + std::to_string(i), cv::Point(10, 30),
cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2);

cv::hconcat(concat_img, gallery_img, concat_img);
}
results_vis[query_file] = concat_img;
}
return results_vis;
}

void printHelpMessage() {
std::cout << "usage: demo.cpp [-h] [--query_dir QUERY_DIR] [--gallery_dir GALLERY_DIR] "
<< "[--backend_target BACKEND_TARGET] [--topk TOPK] [--model MODEL] [--save] [--vis]\n\n"
<< "ReID baseline models from Tencent Youtu Lab\n\n"
<< "optional arguments:\n"
<< " -h, --help show this help message and exit\n"
<< " --query_dir QUERY_DIR, -q QUERY_DIR\n"
<< " Query directory.\n"
<< " --gallery_dir GALLERY_DIR, -g GALLERY_DIR\n"
<< " Gallery directory.\n"
<< " --backend_target BACKEND_TARGET, -bt BACKEND_TARGET\n"
<< " Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + "
"CPU, 1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU\n"
<< " --topk TOPK Top-K closest from gallery for each query.\n"
<< " --model MODEL, -m MODEL\n"
<< " Path to the model.\n"
<< " --save, -s Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in "
"case of camera input.\n"
<< " --vis, -v Usage: Specify to open a new window to show results. Invalid in case of camera input.\n";
}

int main(int argc, char** argv) {
// CommandLineParser setup
cv::CommandLineParser parser(argc, argv,
"{help h | | Show help message.}"
"{query_dir q | | Query directory.}"
"{gallery_dir g | | Gallery directory.}"
"{backend_target bt | 0 | Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + CPU, "
"1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU}"
"{topk k | 10 | Top-K closest from gallery for each query.}"
"{model m | person_reid_youtu_2021nov.onnx | Path to the model.}"
"{save s | false | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}"
"{vis v | false | Usage: Specify to open a new window to show results. Invalid in case of camera input.}");

if (parser.has("help")) {
printHelpMessage();
return 0;
}

std::string query_dir = parser.get<std::string>("query_dir");
std::string gallery_dir = parser.get<std::string>("gallery_dir");
int backend_target = parser.get<int>("backend_target");
int topK = parser.get<int>("topk");
std::string model_path = parser.get<std::string>("model");
bool save_flag = parser.get<bool>("save");
bool vis_flag = parser.get<bool>("vis");

if (!parser.check()) {
parser.printErrors();
return 1;
}

const std::vector<std::pair<int, int>> backend_target_pairs = {
{cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA},
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16},
{cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU},
{cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU}
};

int backend_id = backend_target_pairs[backend_target].first;
int target_id = backend_target_pairs[backend_target].second;

YoutuReID reid(model_path, cv::Size(128, 256), 768,
cv::Scalar(0.485, 0.456, 0.406),
cv::Scalar(0.229, 0.224, 0.225),
backend_id, target_id);

std::pair<std::vector<cv::Mat>, std::vector<std::string>> query_data = readImagesFromDirectory(query_dir);
std::pair<std::vector<cv::Mat>, std::vector<std::string>> gallery_data = readImagesFromDirectory(gallery_dir);

std::vector<std::vector<int>> indices = reid.query(query_data.first, gallery_data.first, topK);

std::map<std::string, std::vector<std::string>> results;
for (size_t i = 0; i < query_data.second.size(); ++i) {
std::vector<std::string> top_matches;
for (int idx : indices[i]) {
top_matches.push_back(gallery_data.second[idx]);
}
results[query_data.second[i]] = top_matches;
std::cout << "Query: " << query_data.second[i] << "\n";
std::cout << "\tTop-" << topK << " from gallery: ";
for (size_t j = 0; j < top_matches.size(); ++j) {
std::cout << top_matches[j] << " ";
}
std::cout << std::endl;
}

std::map<std::string, cv::Mat> results_vis = visualize(results, query_dir, gallery_dir);

if (save_flag) {
for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) {
std::string save_path = "result-" + it->first;
cv::imwrite(save_path, it->second);
}
}

if (vis_flag) {
for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) {
cv::namedWindow("result-" + it->first, cv::WINDOW_AUTOSIZE);
cv::imshow("result-" + it->first, it->second);
cv::waitKey(0);
cv::destroyAllWindows();
}
}

return 0;
}