diff --git a/models/person_reid_youtureid/CMakeLists.txt b/models/person_reid_youtureid/CMakeLists.txt new file mode 100644 index 00000000..b8745c63 --- /dev/null +++ b/models/person_reid_youtureid/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.24.0) +project(opencv_zoo_person_reid_youtureid) + +set(OPENCV_VERSION "4.10.0") +set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation") + +# Find OpenCV +find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH}) + +add_executable(demo demo.cpp) +target_link_libraries(demo ${OpenCV_LIBS}) \ No newline at end of file diff --git a/models/person_reid_youtureid/README.md b/models/person_reid_youtureid/README.md index 362738e7..650b1ab3 100644 --- a/models/person_reid_youtureid/README.md +++ b/models/person_reid_youtureid/README.md @@ -10,6 +10,7 @@ This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/openc Run the following command to try the demo: +### Python ```shell python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v @@ -17,6 +18,18 @@ python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v python demo.py --help ``` +### C++ +```shell +# A typical and default installation path of OpenCV is /usr/local +cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation . +cmake --build build + +./build/demo --query_dir=/path/to/query --gallery_dir=/path/to/gallery -v + +# get help regarding various parameters +./build/demo --help +``` + ### License All files in this directory are licensed under [Apache 2.0 License](./LICENSE). diff --git a/models/person_reid_youtureid/demo.cpp b/models/person_reid_youtureid/demo.cpp new file mode 100644 index 00000000..bac7cc1f --- /dev/null +++ b/models/person_reid_youtureid/demo.cpp @@ -0,0 +1,308 @@ +#include +#include "opencv2/dnn.hpp" +#include +#include +#include +#include +#include + + +// YoutuReID class for person re-identification +class YoutuReID { +public: + YoutuReID(const std::string& model_path, + const cv::Size& input_size = cv::Size(128, 256), + int output_dim = 768, + const cv::Scalar& mean = cv::Scalar(0.485, 0.456, 0.406), + const cv::Scalar& std = cv::Scalar(0.229, 0.224, 0.225), + int backend_id = 0, + int target_id = 0) + : model_path_(model_path), input_size_(input_size), + output_dim_(output_dim), mean_(mean), std_(std), + backend_id_(backend_id), target_id_(target_id) + { + + model_ = cv::dnn::readNet(model_path_); + model_.setPreferableBackend(backend_id_); + model_.setPreferableTarget(target_id_); + } + + void setBackendAndTarget(int backend_id, int target_id) { + backend_id_ = backend_id; + target_id_ = target_id; + model_.setPreferableBackend(backend_id_); + model_.setPreferableTarget(target_id_); + } + + void setInputSize(const cv::Size& input_size) { + input_size_ = input_size; + } + + // Preprocess image by resizing, normalizing, and creating a blob + cv::Mat preprocess(const cv::Mat& image) { + cv::Mat img; + cv::cvtColor(image, img, cv::COLOR_BGR2RGB); + img.convertTo(img, CV_32F, 1.0 / 255.0); + + // Normalize each channel separately + std::vector channels(3); + cv::split(img, channels); + channels[0] = (channels[0] - mean_[0]) / std_[0]; + channels[1] = (channels[1] - mean_[1]) / std_[1]; + channels[2] = (channels[2] - mean_[2]) / std_[2]; + cv::merge(channels, img); + + return cv::dnn::blobFromImage(img); + } + + // Run inference to extract feature vector + cv::Mat infer(const cv::Mat& image) { + cv::Mat input_blob = preprocess(image); + model_.setInput(input_blob); + cv::Mat features = model_.forward(); + + if (features.dims == 4 && features.size[2] == 1 && features.size[3] == 1) { + features = features.reshape(1, {1, features.size[1]}); + } + + return features; + } + + // Perform query, comparing each query image to each gallery image + std::vector> query(const std::vector& query_img_list, + const std::vector& gallery_img_list, + int topK = 5) { + std::vector query_features_list, gallery_features_list; + cv::Mat query_features, gallery_features; + + for (size_t i = 0; i < query_img_list.size(); ++i) { + cv::Mat feature = infer(query_img_list[i]); + query_features_list.push_back(feature.clone()); + } + cv::vconcat(query_features_list, query_features); + normalizeFeatures(query_features); + + for (size_t i = 0; i < gallery_img_list.size(); ++i) { + cv::Mat feature = infer(gallery_img_list[i]); + gallery_features_list.push_back(feature.clone()); + } + cv::vconcat(gallery_features_list, gallery_features); + normalizeFeatures(gallery_features); + + cv::Mat dist = query_features * gallery_features.t(); + return getTopK(dist, topK); + } + +private: + // Normalize feature vectors row-wise to unit length + void normalizeFeatures(cv::Mat& features) { + const float epsilon = 1e-6; + for (int i = 0; i < features.rows; ++i) { + cv::Mat featureRow = features.row(i); + float norm = cv::norm(featureRow, cv::NORM_L2); + if (norm < epsilon) { + norm = epsilon; + } + featureRow /= norm; + } + } + + // Retrieve Top-K indices from similarity matrix + std::vector> getTopK(const cv::Mat& dist, int topK) { + std::vector> indices(dist.rows); + + for (int i = 0; i < dist.rows; ++i) { + std::vector> sim_index_pairs; + for (int j = 0; j < dist.cols; ++j) { + sim_index_pairs.emplace_back(dist.at(i, j), j); + } + std::sort(sim_index_pairs.begin(), sim_index_pairs.end(), + [](const std::pair& a, const std::pair& b) { + return a.first > b.first; + }); + + for (int k = 0; k < topK && k < sim_index_pairs.size(); ++k) { + indices[i].push_back(sim_index_pairs[k].second); + } + } + return indices; + } + + std::string model_path_; + cv::Size input_size_; + int output_dim_; + cv::Scalar mean_, std_; + int backend_id_; + int target_id_; + cv::dnn::Net model_; +}; + +// Read images from directory and return a pair of image list and file list +std::pair, std::vector> readImagesFromDirectory(const std::string& img_dir, int w = 128, int h = 256) { + std::vector img_list; + std::vector file_list; + + std::vector file_names; + cv::glob(img_dir + "/*", file_names, false); + + for (size_t i = 0; i < file_names.size(); ++i) { + std::string file_name = file_names[i].substr(file_names[i].find_last_of("/\\") + 1); + cv::Mat img = cv::imread(file_names[i]); + if (!img.empty()) { + cv::resize(img, img, cv::Size(w, h)); + img_list.push_back(img); + file_list.push_back(file_name); + } + } + return std::make_pair(img_list, file_list); +} + +// Visualize query and gallery results by creating concatenated images +std::map visualize( + const std::map>& results, + const std::string& query_dir, + const std::string& gallery_dir, + const cv::Size& output_size = cv::Size(128, 384)) { + + std::map results_vis; + + for (std::map>::const_iterator it = results.begin(); it != results.end(); ++it) { + const std::string& query_file = it->first; + const std::vector& top_matches = it->second; + + cv::Mat query_img = cv::imread(query_dir + "/" + query_file); + if (query_img.empty()) continue; + + cv::resize(query_img, query_img, output_size); + cv::copyMakeBorder(query_img, query_img, 5, 5, 5, 5, + cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + cv::putText(query_img, "Query", cv::Point(10, 30), + cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2); + + cv::Mat concat_img = query_img; + + for (size_t i = 0; i < top_matches.size(); ++i) { + cv::Mat gallery_img = cv::imread(gallery_dir + "/" + top_matches[i]); + if (gallery_img.empty()) continue; + + cv::resize(gallery_img, gallery_img, output_size); + cv::copyMakeBorder(gallery_img, gallery_img, 5, 5, 5, 5, + cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255)); + cv::putText(gallery_img, "G" + std::to_string(i), cv::Point(10, 30), + cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2); + + cv::hconcat(concat_img, gallery_img, concat_img); + } + results_vis[query_file] = concat_img; + } + return results_vis; +} + +void printHelpMessage() { + std::cout << "usage: demo.cpp [-h] [--query_dir QUERY_DIR] [--gallery_dir GALLERY_DIR] " + << "[--backend_target BACKEND_TARGET] [--topk TOPK] [--model MODEL] [--save] [--vis]\n\n" + << "ReID baseline models from Tencent Youtu Lab\n\n" + << "optional arguments:\n" + << " -h, --help show this help message and exit\n" + << " --query_dir QUERY_DIR, -q QUERY_DIR\n" + << " Query directory.\n" + << " --gallery_dir GALLERY_DIR, -g GALLERY_DIR\n" + << " Gallery directory.\n" + << " --backend_target BACKEND_TARGET, -bt BACKEND_TARGET\n" + << " Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + " + "CPU, 1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU\n" + << " --topk TOPK Top-K closest from gallery for each query.\n" + << " --model MODEL, -m MODEL\n" + << " Path to the model.\n" + << " --save, -s Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in " + "case of camera input.\n" + << " --vis, -v Usage: Specify to open a new window to show results. Invalid in case of camera input.\n"; +} + +int main(int argc, char** argv) { + // CommandLineParser setup + cv::CommandLineParser parser(argc, argv, + "{help h | | Show help message.}" + "{query_dir q | | Query directory.}" + "{gallery_dir g | | Gallery directory.}" + "{backend_target bt | 0 | Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + CPU, " + "1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU}" + "{topk k | 10 | Top-K closest from gallery for each query.}" + "{model m | person_reid_youtu_2021nov.onnx | Path to the model.}" + "{save s | false | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}" + "{vis v | false | Usage: Specify to open a new window to show results. Invalid in case of camera input.}"); + + if (parser.has("help")) { + printHelpMessage(); + return 0; + } + + std::string query_dir = parser.get("query_dir"); + std::string gallery_dir = parser.get("gallery_dir"); + int backend_target = parser.get("backend_target"); + int topK = parser.get("topk"); + std::string model_path = parser.get("model"); + bool save_flag = parser.get("save"); + bool vis_flag = parser.get("vis"); + + if (!parser.check()) { + parser.printErrors(); + return 1; + } + + const std::vector> backend_target_pairs = { + {cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU}, + {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA}, + {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16}, + {cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU}, + {cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU} + }; + + int backend_id = backend_target_pairs[backend_target].first; + int target_id = backend_target_pairs[backend_target].second; + + YoutuReID reid(model_path, cv::Size(128, 256), 768, + cv::Scalar(0.485, 0.456, 0.406), + cv::Scalar(0.229, 0.224, 0.225), + backend_id, target_id); + + std::pair, std::vector> query_data = readImagesFromDirectory(query_dir); + std::pair, std::vector> gallery_data = readImagesFromDirectory(gallery_dir); + + std::vector> indices = reid.query(query_data.first, gallery_data.first, topK); + + std::map> results; + for (size_t i = 0; i < query_data.second.size(); ++i) { + std::vector top_matches; + for (int idx : indices[i]) { + top_matches.push_back(gallery_data.second[idx]); + } + results[query_data.second[i]] = top_matches; + std::cout << "Query: " << query_data.second[i] << "\n"; + std::cout << "\tTop-" << topK << " from gallery: "; + for (size_t j = 0; j < top_matches.size(); ++j) { + std::cout << top_matches[j] << " "; + } + std::cout << std::endl; + } + + std::map results_vis = visualize(results, query_dir, gallery_dir); + + if (save_flag) { + for (std::map::iterator it = results_vis.begin(); it != results_vis.end(); ++it) { + std::string save_path = "result-" + it->first; + cv::imwrite(save_path, it->second); + } + } + + if (vis_flag) { + for (std::map::iterator it = results_vis.begin(); it != results_vis.end(); ++it) { + cv::namedWindow("result-" + it->first, cv::WINDOW_AUTOSIZE); + cv::imshow("result-" + it->first, it->second); + cv::waitKey(0); + cv::destroyAllWindows(); + } + } + + return 0; +}