diff --git a/README.md b/README.md index a801234c..ba2cdfb0 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ # Last changes +* TensorRT 10 is supported + +* YOLOv11, YOLOv11-obb and YOLOv11-seg detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example + * YOLOv8-obb detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example * YOLOv10 detector worked with TensorRT! Export pretrained Pytorch models [here (THU-MIG/yolov10)](https://github.com/THU-MIG/yolov10) to onnx format and run Multitarget-tracker with -e=6 example diff --git a/data/settings_yolov11.ini b/data/settings_yolov11.ini new file mode 100644 index 00000000..c82412cd --- /dev/null +++ b/data/settings_yolov11.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 12 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this frames then it will be removed +max_skip_frames = 50 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 50 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11_obb.ini b/data/settings_yolov11_obb.ini new file mode 100644 index 00000000..599e5dd5 --- /dev/null +++ b/data/settings_yolov11_obb.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 12 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/DOTA.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this frames then it will be removed +max_skip_frames = 50 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 50 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11_seg.ini b/data/settings_yolov11_seg.ini new file mode 100644 index 00000000..cb5c83ea --- /dev/null +++ b/data/settings_yolov11_seg.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 12 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this frames then it will be removed +max_skip_frames = 50 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 50 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/example/examples.h b/example/examples.h index 1be76399..08b0fc67 100644 --- a/example/examples.h +++ b/example/examples.h @@ -652,7 +652,10 @@ class YoloTensorRTExample final : public VideoExample YOLOV8_OBB, YOLOv8Mask, YOLOv9, - YOLOv10 + YOLOv10, + YOLOv11, + YOLOv11_OBB, + YOLOv11Mask }; YOLOModels usedModel = YOLOModels::YOLOv9; switch (usedModel) diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 01d1102f..3da65967 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -142,6 +142,9 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["YOLOV8Mask"] = ModelType::YOLOV8Mask; dictNetType["YOLOV9"] = ModelType::YOLOV9; dictNetType["YOLOV10"] = ModelType::YOLOV10; + dictNetType["YOLOV11"] = ModelType::YOLOV11; + dictNetType["YOLOV11_OBB"] = ModelType::YOLOV11_OBB; + dictNetType["YOLOV11Mask"] = ModelType::YOLOV11Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -348,7 +351,7 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr } else { - if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV5 || m_netType == ModelType::YOLOV9 || m_netType == ModelType::YOLOV10) + if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV5 || m_netType == ModelType::YOLOV9 || m_netType == ModelType::YOLOV10 || m_netType == ModelType::YOLOV11) { int rows = detections[0].size[1]; int dimensions = detections[0].size[2]; @@ -370,7 +373,7 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr for (int i = 0; i < rows; ++i) { - if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV9) + if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV9 || m_netType == ModelType::YOLOV11) { float* classes_scores = data + 4; diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 79842ba2..44d91b4d 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -42,7 +42,10 @@ class OCVDNNDetector final : public BaseDetector YOLOV8_OBB, YOLOV8Mask, YOLOV9, - YOLOV10 + YOLOV10, + YOLOV11, + YOLOV11_OBB, + YOLOV11Mask }; cv::dnn::Net m_net; diff --git a/src/Detector/YoloTensorRTDetector.cpp b/src/Detector/YoloTensorRTDetector.cpp index a0ebeb44..d1cfb352 100644 --- a/src/Detector/YoloTensorRTDetector.cpp +++ b/src/Detector/YoloTensorRTDetector.cpp @@ -107,6 +107,9 @@ bool YoloTensorRTDetector::Init(const config_t& config) dictNetType["YOLOV8Mask"] = tensor_rt::YOLOV8Mask; dictNetType["YOLOV9"] = tensor_rt::YOLOV9; dictNetType["YOLOV10"] = tensor_rt::YOLOV10; + dictNetType["YOLOV11"] = tensor_rt::YOLOV11; + dictNetType["YOLOV11_OBB"] = tensor_rt::YOLOV11_OBB; + dictNetType["YOLOV11Mask"] = tensor_rt::YOLOV11Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -298,7 +301,7 @@ void YoloTensorRTDetector::Detect(const std::vector& frames, std::vect /// void YoloTensorRTDetector::CalcMotionMap(cv::Mat& frame) { - if (m_localConfig.net_type == tensor_rt::YOLOV7Mask || m_localConfig.net_type == tensor_rt::YOLOV8Mask) + if (m_localConfig.net_type == tensor_rt::YOLOV7Mask || m_localConfig.net_type == tensor_rt::YOLOV8Mask || m_localConfig.net_type == tensor_rt::YOLOV11Mask) { static std::vector color; if (color.empty()) diff --git a/src/Detector/tensorrt_yolo/CMakeLists.txt b/src/Detector/tensorrt_yolo/CMakeLists.txt index 30509d0e..d09a2243 100644 --- a/src/Detector/tensorrt_yolo/CMakeLists.txt +++ b/src/Detector/tensorrt_yolo/CMakeLists.txt @@ -43,7 +43,7 @@ SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) find_package(CUDNN REQUIRED) find_package(TensorRT REQUIRED) -message("TensorRT major version: " ${TensorRT_VERSION_MAJOR}) +message("TensorRT version: " ${TensorRT_VERSION}) include_directories(${OpenCV_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS}) @@ -58,13 +58,17 @@ file(GLOB TENSORRT_CUDA_FILES *.cu) cuda_add_library(${libname_rt} SHARED ${TENSORRT_CUDA_FILES} ${TENSORRT_SOURCE_FILES} - ${TENSORRT_HEADER_FILES} -) + ${TENSORRT_HEADER_FILES}) #message("TensorRT OpenCV libraries:") #message("${OpenCV_LIBS}") #message(${OpenCV_DIR}) +set(TensorRT_LIBRARIES ${TensorRT_LIBRARIES} ${TensorRT_nvinfer_LIBRARY} ${TensorRT_nvinfer_plugin_LIBRARY} ${TensorRT_nvonnxparser_LIBRARY}) + +message("TensorRT_LIBRARIES: ${TensorRT_LIBRARIES}") + + set(TENSORRT_LIBS ${OpenCV_LIBS} #${CUDA_LIBRARIES} @@ -74,13 +78,14 @@ set(TENSORRT_LIBS ${CUDA_curand_LIBRARY} ${CUDNN_LIBRARY} # ${LIB_PTHREAD} - ${TensorRT_LIBRARIES} -) + ${TensorRT_LIBRARIES}) if (CMAKE_COMPILER_IS_GNUCXX) - set(TENSORRT_LIBS ${TENSORRT_LIBS} stdc++fs nvinfer_plugin nvonnxparser) + set(TENSORRT_LIBS ${TENSORRT_LIBS} stdc++fs) endif(CMAKE_COMPILER_IS_GNUCXX) +message("TENSORRT_LIBS: ${TENSORRT_LIBS}") + target_link_libraries(${libname_rt} ${TENSORRT_LIBS}) install(TARGETS ${libname_rt} @@ -90,4 +95,4 @@ install(TARGETS ${libname_rt} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) -set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") \ No newline at end of file +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Detector/tensorrt_yolo/YoloONNX.cpp b/src/Detector/tensorrt_yolo/YoloONNX.cpp index b016c4b3..3ea99ec4 100644 --- a/src/Detector/tensorrt_yolo/YoloONNX.cpp +++ b/src/Detector/tensorrt_yolo/YoloONNX.cpp @@ -1,5 +1,7 @@ #include +#define DEFINE_TRT_ENTRYPOINTS 1 + #include "YoloONNX.hpp" #include "trt_utils.h" #include "../../common/defines.h" @@ -22,14 +24,13 @@ bool YoloONNX::Init(const SampleYoloParams& params) auto GetBindings = [&]() { - auto numBindings = m_engine->getNbBindings(); + auto numBindings = m_engine->getNbIOTensors(); std::cout << "** Bindings: " << numBindings << " **" << std::endl; for (int32_t i = 0; i < numBindings; ++i) { - nvinfer1::Dims dim = m_engine->getBindingDimensions(i); - - std::string bindName = m_engine->getBindingName(i); + std::string bindName = m_engine->getIOTensorName(i); + nvinfer1::Dims dim = m_engine->getTensorShape(bindName.c_str()); for (const auto& outName : m_params.outputTensorNames) { if (bindName == outName) @@ -77,27 +78,17 @@ bool YoloONNX::Init(const SampleYoloParams& params) delete infer; #endif - sample::gLogInfo << "TRT Engine loaded from: " << m_params.engineFileName << std::endl; - - GetBindings(); - - if (!m_engine) + if (m_engine) { - res = false; + GetBindings(); + m_inputDims = m_engine->getTensorShape(m_engine->getIOTensorName(0)); + res = true; } else { -#if 1 - m_inputDims = m_engine->getBindingDimensions(0); -#else - m_inputDims.nbDims = 4; - m_inputDims.d[0] = m_params.explicitBatchSize; - m_inputDims.d[1] = 3; - m_inputDims.d[2] = m_params.width; - m_inputDims.d[3] = m_params.height; -#endif res = true; } + sample::gLogInfo << "TRT Engine loaded from: " << m_params.engineFileName << " with res = " << res << std::endl; } else { @@ -175,9 +166,9 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, size_t dlaManagedSRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM); size_t dlaLocalDRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM); size_t dlaGlobalDRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM); - std::cout << "workspaceSize = " << workspaceSize << ", dlaManagedSRAMSize = " << dlaManagedSRAMSize << ", dlaLocalDRAMSize = " << dlaLocalDRAMSize << ", dlaGlobalDRAMSize = " << dlaGlobalDRAMSize << std::endl; + std::cout << "m_params.videoMemory = " << m_params.videoMemory << ", workspaceSize = " << workspaceSize << ", dlaManagedSRAMSize = " << dlaManagedSRAMSize << ", dlaLocalDRAMSize = " << dlaLocalDRAMSize << ", dlaGlobalDRAMSize = " << dlaGlobalDRAMSize << std::endl; - config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, m_params.videoMemory ? m_params.videoMemory : 4096_MiB); + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, m_params.videoMemory ? m_params.videoMemory : workspaceSize); #endif config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp new file mode 100644 index 00000000..9103bfa6 --- /dev/null +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp @@ -0,0 +1,111 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_bb_onnx class +/// +class YOLOv11_bb_onnx : public YoloONNX +{ +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + + const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); + const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = m_outpuDims[0].d[ncInd] - 4; + int dimensions = nc + 4; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2); + float y = fh * (output[k + 1] - output[k + 3] / 2); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp new file mode 100644 index 00000000..54fc6b01 --- /dev/null +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp @@ -0,0 +1,301 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_instance_onnx class +/// +class YOLOv11_instance_onnx : public YoloONNX +{ +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); + const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); + + size_t outInd = (outputs.size() == 0) ? 1 : 0; + size_t segInd = (outputs.size() == 0) ? 0 : 1; + + auto output = outputs[0]; + + //std::cout << "output[1] mem:\n"; + //auto output1 = outputs[1]; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output1[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x116x8400 + //2: name: output1, size: 1x32x160x160 + // 25200 = 3x80x80 + 3x40x40 + 3x20x20 + // 116 = x, y, w, h, 80 classes, 32 seg ancors + // 80 * 8 = 640, 40 * 16 = 640, 20 * 32 = 640 + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = m_outpuDims[outInd].d[ncInd] - 4 - 32; + int dimensions = nc + 32 + 4; + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]) / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + +#if 1 + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + //std::cout << "output1 nbDims: " << m_outpuDims[segInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[segInd].d[i]; + // if (i + 1 != m_outpuDims[segInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[outInd].d[i]; + // if (i + 1 != m_outpuDims[outInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + + segChannels = m_outpuDims[segInd].d[1]; + segWidth = m_outpuDims[segInd].d[2]; + segHeight = m_outpuDims[segInd].d[3]; + } + cv::Mat maskProposals; + std::vector> picked_proposals; + int net_width = nc + 4 + segChannels; +#endif + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4 + 32); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // std::cout << "without nms: mem" << i << ": "; + // for (size_t ii = 0; ii < 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = 4; ii < nc + 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + //} + + if (objectConf >= m_params.confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2); + float y = fh * (output[k + 1] - output[k + 3] / 2); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + + //auto ClampToFrame = [](float& v, float& size, int hi) -> int + //{ + // int res = 0; +// + // if (size < 1) + // size = 0; +// + // if (v < 0) + // { + // res = v; + // v = 0; + // return res; + // } + // else if (v + size > hi - 1) + // { + // res = v; + // v = hi - 1 - size; + // if (v < 0) + // { + // size += v; + // v = 0; + // } + // res -= v; + // return res; + // } + // return res; + //}; + //ClampToFrame(x, width, frameSize.width); + //ClampToFrame(y, height, frameSize.height); + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (width > 4 && height > 4) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + std::vector temp_proto(output + k + 4 + nc, output + k + net_width); + picked_proposals.push_back(temp_proto); + } + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); + maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); + } + + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[1]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = m_inputDims.d[3]; + int INPUT_H = m_inputDims.d[2]; + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, frameSize, cv::INTER_NEAREST); + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + + std::vector> contours; + std::vector hierarchy; +#if (CV_VERSION_MAJOR < 4) + cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + + br.x += resBoxes[i].m_brect.x; + br.y += resBoxes[i].m_brect.y; + rr.center.x += resBoxes[i].m_brect.x; + rr.center.y += resBoxes[i].m_brect.y; + + //std::cout << "rr: " << rr.center << ", " << rr.angle << ", " << rr.size << std::endl; + + if (resBoxes[i].m_boxMask.size() != br.size()) + { + br.width = resBoxes[i].m_boxMask.cols; + br.height = resBoxes[i].m_boxMask.rows; + if (br.x + br.width >= frameSize.width) + br.x = frameSize.width - br.width; + if (br.y + br.height >= frameSize.height) + br.y = frameSize.height - br.height; + } + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + break; + } + } + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp new file mode 100644 index 00000000..7c2b98ce --- /dev/null +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp @@ -0,0 +1,124 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_obb_onnx class +/// +class YOLOv11_obb_onnx : public YoloONNX +{ +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x20x21504 + //20: 15 DOTA classes + x + y + w + h + a + constexpr int shapeDataSize = 5; + + const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); + const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = m_outpuDims[0].d[ncInd] - shapeDataSize; + int dimensions = nc + shapeDataSize; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + shapeDataSize); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // for (int jj = 0; jj < 20; ++jj) + // { + // std::cout << output[jj] << " "; + // } + // std::cout << std::endl; + //} + + if (objectConf >= m_params.confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) + float cx = fw * output[k]; + float cy = fh * output[k + 1]; + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI; + rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); + + //if (rectBoxes.size() == 1) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + //std::vector indices; + //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + //resBoxes.reserve(indices.size()); + + resBoxes.reserve(rectBoxes.size()); + for (size_t bi = 0; bi < rectBoxes.size(); ++bi) + { + resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/class_detector.cpp b/src/Detector/tensorrt_yolo/class_detector.cpp index f7a18e23..895e5d96 100644 --- a/src/Detector/tensorrt_yolo/class_detector.cpp +++ b/src/Detector/tensorrt_yolo/class_detector.cpp @@ -10,6 +10,10 @@ #include "YoloONNXv8_instance.hpp" #include "YoloONNXv9_bb.hpp" #include "YoloONNXv10_bb.hpp" +#include "YoloONNXv11_bb.hpp" +#include "YoloONNXv11_obb.hpp" +#include "YoloONNXv11_instance.hpp" + namespace tensor_rt { @@ -110,6 +114,22 @@ namespace tensor_rt m_params.outputTensorNames.push_back("output0"); m_detector = std::make_unique(); break; + case ModelType::YOLOV11: + m_params.inputTensorNames.push_back("images"); + m_params.outputTensorNames.push_back("output0"); + m_detector = std::make_unique(); + break; + case ModelType::YOLOV11_OBB: + m_params.inputTensorNames.push_back("images"); + m_params.outputTensorNames.push_back("output0"); + m_detector = std::make_unique(); + break; + case ModelType::YOLOV11Mask: + m_params.inputTensorNames.push_back("images"); + m_params.outputTensorNames.push_back("output0"); + m_params.outputTensorNames.push_back("output1"); + m_detector = std::make_unique(); + break; } // Threshold values @@ -193,7 +213,8 @@ namespace tensor_rt if (config.net_type == ModelType::YOLOV6 || config.net_type == ModelType::YOLOV7 || config.net_type == ModelType::YOLOV7Mask || config.net_type == ModelType::YOLOV8 || config.net_type == ModelType::YOLOV8_OBB || config.net_type == ModelType::YOLOV8Mask || - config.net_type == ModelType::YOLOV9 || config.net_type == ModelType::YOLOV10) + config.net_type == ModelType::YOLOV9 || config.net_type == ModelType::YOLOV10 || + config.net_type == ModelType::YOLOV11 || config.net_type == ModelType::YOLOV11_OBB || config.net_type == ModelType::YOLOV11Mask) m_impl = new YoloONNXImpl(); else m_impl = new YoloDectectorImpl(); diff --git a/src/Detector/tensorrt_yolo/class_detector.h b/src/Detector/tensorrt_yolo/class_detector.h index 1dd85d70..b4da0d0a 100644 --- a/src/Detector/tensorrt_yolo/class_detector.h +++ b/src/Detector/tensorrt_yolo/class_detector.h @@ -54,7 +54,10 @@ namespace tensor_rt YOLOV8_OBB, YOLOV8Mask, YOLOV9, - YOLOV10 + YOLOV10, + YOLOV11, + YOLOV11_OBB, + YOLOV11Mask }; /// diff --git a/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake b/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake index 7f07dd36..f4f9f42c 100644 --- a/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake +++ b/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake @@ -1,39 +1,51 @@ +# ~~~ +# Copyright 2021 Olivier Le Doeuff +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # This module defines the following variables: # -# :: +# - TensorRT_FOUND: A boolean specifying whether or not TensorRT was found. +# - TensorRT_VERSION: The exact version of TensorRT found +# - TensorRT_VERSION_MAJOR: The major version of TensorRT. +# - TensorRT_VERSION_MINOR: The minor version of TensorRT. +# - TensorRT_VERSION_PATCH: The patch version of TensorRT. +# - TensorRT_VERSION_TWEAK: The tweak version of TensorRT. +# - TensorRT_INCLUDE_DIRS: The path to TensorRT ``include`` folder containing the header files required to compile a project linking against TensorRT. +# - TensorRT_LIBRARY_DIRS: The path to TensorRT library directory that contains libraries. # -# TensorRT_INCLUDE_DIRS -# TensorRT_LIBRARIES -# TensorRT_FOUND -# -# :: -# -# TensorRT_VERSION_STRING - version (x.y.z) -# TensorRT_VERSION_MAJOR - major version (x) -# TensorRT_VERSION_MINOR - minor version (y) -# TensorRT_VERSION_PATCH - patch version (z) +# This module create following targets: +# - trt::nvinfer +# - trt::nvinfer_plugin +# - trt::nvonnxparser +# - trt::nvparsers +# This script was inspired from https://github.com/NicolasIRAGNE/CMakeScripts +# This script was inspired from https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake # # Hints # ^^^^^ # A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look. -# -set(_TensorRT_SEARCHES) +# ~~~ -if(TensorRT_ROOT) - set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_ROOT} NO_DEFAULT_PATH) - list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT) +if(NOT TensorRT_FIND_COMPONENTS) + set(TensorRT_FIND_COMPONENTS nvinfer nvinfer_plugin nvonnxparser) endif() +set(TensorRT_LIBRARIES) -# appends some common paths -set(_TensorRT_SEARCH_NORMAL - PATHS "/usr" +# find the include directory of TensorRT +find_path( + TensorRT_INCLUDE_DIR + NAMES NvInfer.h + PATHS ${TensorRT_ROOT} ENV TensorRT_ROOT + PATH_SUFFIXES include ) -list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL) -# Include dir -foreach(search ${_TensorRT_SEARCHES}) - find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include) -endforeach() +string(FIND ${TensorRT_INCLUDE_DIR} "NOTFOUND" _include_dir_notfound) +if(NOT _include_dir_notfound EQUAL -1) + if(TensorRT_FIND_REQUIRED) + message(FATAL_ERROR "Fail to find TensorRT, please set TensorRT_ROOT. Include path not found.") + endif() + return() if(NOT TensorRT_LIBRARY) foreach(search ${_TensorRT_SEARCHES}) @@ -42,34 +54,71 @@ if(NOT TensorRT_LIBRARY) find_library(TRT_NVINFER_PLUGIN NAMES nvinfer_plugin ${${search}} PATH_SUFFIXES lib lib64 lib/x64) endforeach() list(APPEND TensorRT_LIBRARY ${TRT_NVINFER} ${TRT_NVINFER_PLUGIN} ${TRT_NVONNX_PARSER}) -endif() -mark_as_advanced(TensorRT_INCLUDE_DIR) +endif() +set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) -if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") +# Extract version of tensorrt +if(EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_TWEAK REGEX "^#define NV_TENSORRT_BUILD [0-9]+.*$") - string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") - string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") - string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") - set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") + string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") + string(REGEX REPLACE "^#define NV_TENSORRT_BUILD ([0-9]+).*$" "\\1" TensorRT_VERSION_TWEAK "${TensorRT_TWEAK}") + set(TensorRT_VERSION "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}.${TensorRT_VERSION_TWEAK}") endif() -include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING) +function(_find_trt_component component) + + # Find library for component (ie nvinfer, nvparsers, etc...) + find_library( + TensorRT_${component}_LIBRARY + NAMES ${component} + PATHS ${TensorRT_ROOT} ${TENSORRT_LIBRARY_DIR} ENV TensorRT_ROOT + ) -if(TensorRT_FOUND) - set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) + string(FIND ${TensorRT_${component}_LIBRARY} "NOTFOUND" _library_not_found) - if(NOT TensorRT_LIBRARIES) - set(TensorRT_LIBRARIES ${TensorRT_LIBRARY}) + if(NOT TensorRT_LIBRARY_DIR) + get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY) + set(TensorRT_LIBRARY_DIR + "${_path}" + CACHE INTERNAL "TensorRT_LIBRARY_DIR" + ) endif() - if(NOT TARGET TensorRT::TensorRT) - add_library(TensorRT::TensorRT UNKNOWN IMPORTED) - set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}") - set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}") + if(NOT TensorRT_LIBRARY_DIRS) + get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY) + set(TensorRT_LIBRARY_DIRS + "${_path}" + CACHE INTERNAL "TensorRT_LIBRARY_DIRS" + ) endif() -endif() + + # Library found, and doesn't already exists + if(_library_not_found EQUAL -1 AND NOT TARGET trt::${component}) + set(TensorRT_${component}_FOUND + TRUE + CACHE INTERNAL "Found ${component}" + ) + + # Create a target + add_library(trt::${component} IMPORTED INTERFACE) + target_include_directories(trt::${component} SYSTEM INTERFACE "${TensorRT_INCLUDE_DIRS}") + target_link_libraries(trt::${component} INTERFACE "${TensorRT_${component}_LIBRARY}") + set(TensorRT_LIBRARIES ${TensorRT_LIBRARIES} ${TensorRT_${component}_LIBRARY}) + endif() + +endfunction() + +# Find each components +foreach(component IN LISTS TensorRT_FIND_COMPONENTS) + _find_trt_component(${component}) +endforeach() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(TensorRT HANDLE_COMPONENTS VERSION_VAR TensorRT_VERSION REQUIRED_VARS TensorRT_INCLUDE_DIR) diff --git a/src/Detector/tensorrt_yolo/common/BatchStream.h b/src/Detector/tensorrt_yolo/common/BatchStream.h index a8da9923..c4ab9de0 100644 --- a/src/Detector/tensorrt_yolo/common/BatchStream.h +++ b/src/Detector/tensorrt_yolo/common/BatchStream.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -119,7 +120,7 @@ class MNISTBatchStream : public IBatchStream file.read(reinterpret_cast(rawData.data()), numElements * sizeof(uint8_t)); mData.resize(numElements); std::transform( - rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast(val) / 255.f; }); + rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast(val) / 255.F; }); } void readLabelsFile(const std::string& labelsFilePath) @@ -152,42 +153,39 @@ class MNISTBatchStream : public IBatchStream class BatchStream : public IBatchStream { public: - BatchStream( - int batchSize, int maxBatches, std::string prefix, std::string suffix, std::vector directories) + BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::string const& suffix, + std::vector const& directories) : mBatchSize(batchSize) , mMaxBatches(maxBatches) , mPrefix(prefix) , mSuffix(suffix) , mDataDir(directories) { - FILE* file = fopen(locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), "rb"); - ASSERT(file != nullptr); + std::ifstream file(locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), std::ios::binary); + ASSERT(file.good()); int d[4]; - size_t readSize = fread(d, sizeof(int), 4, file); - ASSERT(readSize == 4); + file.read(reinterpret_cast(d), 4 * sizeof(int32_t)); mDims.nbDims = 4; // The number of dimensions. mDims.d[0] = d[0]; // Batch Size mDims.d[1] = d[1]; // Channels mDims.d[2] = d[2]; // Height mDims.d[3] = d[3]; // Width ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0); - fclose(file); mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; mBatch.resize(mBatchSize * mImageSize, 0); mLabels.resize(mBatchSize, 0); mFileBatch.resize(mDims.d[0] * mImageSize, 0); mFileLabels.resize(mDims.d[0], 0); - reset(0); } - BatchStream(int batchSize, int maxBatches, std::string prefix, std::vector directories) + BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::vector const& directories) : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) { } - BatchStream( - int batchSize, int maxBatches, nvinfer1::Dims dims, std::string listFile, std::vector directories) + BatchStream(int batchSize, int maxBatches, nvinfer1::Dims const& dims, std::string const& listFile, + std::vector const& directories) : mBatchSize(batchSize) , mMaxBatches(maxBatches) , mDims(dims) @@ -199,7 +197,6 @@ class BatchStream : public IBatchStream mLabels.resize(mBatchSize, 0); mFileBatch.resize(mDims.d[0] * mImageSize, 0); mFileLabels.resize(mDims.d[0], 0); - reset(0); } // Resets data members @@ -219,7 +216,7 @@ class BatchStream : public IBatchStream return false; } - for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) + for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) { ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); if (mFileBatchPos == mDims.d[0] && !update()) @@ -228,7 +225,7 @@ class BatchStream : public IBatchStream } // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer. - csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); + csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); std::copy_n( getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize); std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos); @@ -295,22 +292,16 @@ class BatchStream : public IBatchStream if (mListFile.empty()) { std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir); - FILE* file = fopen(inputFileName.c_str(), "rb"); + std::ifstream file(inputFileName.c_str(), std::ios::binary); if (!file) { return false; } - int d[4]; - size_t readSize = fread(d, sizeof(int), 4, file); - ASSERT(readSize == 4); + file.read(reinterpret_cast(d), 4 * sizeof(int32_t)); ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]); - size_t readInputCount = fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file); - ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize)); - size_t readLabelCount = fread(getFileLabels(), sizeof(float), mDims.d[0], file); - ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0])); - - fclose(file); + file.read(reinterpret_cast(getFileBatch()), sizeof(float) * mDims.d[0] * mImageSize); + file.read(reinterpret_cast(getFileLabels()), sizeof(float) * mDims.d[0]); } else { @@ -368,7 +359,7 @@ class BatchStream : public IBatchStream return true; } - int mBatchSize{0}; + int64_t mBatchSize{0}; int mMaxBatches{0}; int mBatchCount{0}; int mFileCount{0}; diff --git a/src/Detector/tensorrt_yolo/common/EntropyCalibrator.h b/src/Detector/tensorrt_yolo/common/EntropyCalibrator.h index f31789bf..67a0130e 100644 --- a/src/Detector/tensorrt_yolo/common/EntropyCalibrator.h +++ b/src/Detector/tensorrt_yolo/common/EntropyCalibrator.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -28,8 +29,8 @@ template class EntropyCalibratorImpl { public: - EntropyCalibratorImpl( - TBatchStream stream, int firstBatch, std::string networkName, const char* inputBlobName, bool readCache = true) + EntropyCalibratorImpl(TBatchStream const& stream, int firstBatch, std::string const& networkName, + const char* inputBlobName, bool readCache = true) : mStream{stream} , mCalibrationTableName("CalibrationTable" + networkName) , mInputBlobName(inputBlobName) @@ -51,11 +52,12 @@ class EntropyCalibratorImpl return mStream.getBatchSize(); } - bool getBatch(void* bindings[], const char* names[], int /*nbBindings*/) noexcept + bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept { if (!mStream.next()) + { return false; - + } CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); ASSERT(!strcmp(names[0], mInputBlobName)); bindings[0] = mDeviceInput; @@ -101,8 +103,8 @@ template class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { public: - Int8EntropyCalibrator2( - TBatchStream stream, int firstBatch, const char* networkName, const char* inputBlobName, bool readCache = true) + Int8EntropyCalibrator2(TBatchStream const& stream, int32_t firstBatch, const char* networkName, + const char* inputBlobName, bool readCache = true) : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) { } diff --git a/src/Detector/tensorrt_yolo/common/ErrorRecorder.h b/src/Detector/tensorrt_yolo/common/ErrorRecorder.h index 40b35fb5..bfb857c5 100644 --- a/src/Detector/tensorrt_yolo/common/ErrorRecorder.h +++ b/src/Detector/tensorrt_yolo/common/ErrorRecorder.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -16,7 +17,7 @@ #ifndef ERROR_RECORDER_H #define ERROR_RECORDER_H -#include "NvInferRuntimeCommon.h" +#include "NvInferRuntime.h" #include "logger.h" #include #include @@ -44,7 +45,7 @@ class SampleErrorRecorder : public IErrorRecorder public: SampleErrorRecorder() = default; - virtual ~SampleErrorRecorder() noexcept {} + ~SampleErrorRecorder() noexcept override {} int32_t getNbErrors() const noexcept final { return mErrorStack.size(); diff --git a/src/Detector/tensorrt_yolo/common/argsParser.h b/src/Detector/tensorrt_yolo/common/argsParser.h new file mode 100644 index 00000000..1f0b9025 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/argsParser.h @@ -0,0 +1,162 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_ARGS_PARSER_H +#define TENSORRT_ARGS_PARSER_H + +#ifdef _MSC_VER +#include "getOptWin.h" +#else +#include +#endif +#include +#include +#include + +namespace samplesCommon +{ + +//! +//! \brief The SampleParams structure groups the basic parameters required by +//! all sample networks. +//! +struct SampleParams +{ + int32_t batchSize{1}; //!< Number of inputs in a batch + int32_t dlaCore{-1}; //!< Specify the DLA core to run network on. + bool int8{false}; //!< Allow runnning the network in Int8 mode. + bool fp16{false}; //!< Allow running the network in FP16 mode. + bool bf16{false}; //!< Allow running the network in BF16 mode. + std::vector dataDirs; //!< Directory paths where sample data files are stored + std::vector inputTensorNames; + std::vector outputTensorNames; + std::string timingCacheFile; //!< Path to timing cache file +}; + +//! +//! \brief The OnnxSampleParams structure groups the additional parameters required by +//! networks that use ONNX +//! +struct OnnxSampleParams : public SampleParams +{ + std::string onnxFileName; //!< Filename of ONNX file of a network +}; + +//! +//! /brief Struct to maintain command-line arguments. +//! +struct Args +{ + bool runInInt8{false}; + bool runInFp16{false}; + bool runInBf16{false}; + bool help{false}; + int32_t useDLACore{-1}; + int32_t batch{1}; + std::vector dataDirs; + std::string saveEngine; + std::string loadEngine; + bool rowOrder{true}; + std::string timingCacheFile; +}; + +//! +//! \brief Populates the Args struct with the provided command-line parameters. +//! +//! \throw invalid_argument if any of the arguments are not valid +//! +//! \return boolean If return value is true, execution can continue, otherwise program should exit +//! +inline bool parseArgs(Args& args, int32_t argc, char* argv[]) +{ + while (1) + { + int32_t arg; + static struct option long_options[] + = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'}, {"int8", no_argument, 0, 'i'}, + {"fp16", no_argument, 0, 'f'}, {"bf16", no_argument, 0, 'z'}, {"columnOrder", no_argument, 0, 'c'}, + {"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'}, + {"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'}, + {"timingCacheFile", required_argument, 0, 't'}, {nullptr, 0, nullptr, 0}}; + int32_t option_index = 0; + arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index); + if (arg == -1) + { + break; + } + + switch (arg) + { + case 'h': args.help = true; return true; + case 'd': + if (optarg) + { + args.dataDirs.push_back(optarg); + } + else + { + std::cerr << "ERROR: --datadir requires option argument" << std::endl; + return false; + } + break; + case 's': + if (optarg) + { + args.saveEngine = optarg; + } + break; + case 'o': + if (optarg) + { + args.loadEngine = optarg; + } + break; + case 'i': args.runInInt8 = true; break; + case 'f': args.runInFp16 = true; break; + case 'z': args.runInBf16 = true; break; + case 'c': args.rowOrder = false; break; + case 'u': + if (optarg) + { + args.useDLACore = std::stoi(optarg); + } + break; + case 'b': + if (optarg) + { + args.batch = std::stoi(optarg); + } + break; + case 't': + if (optarg) + { + args.timingCacheFile = optarg; + } + else + { + std::cerr << "ERROR: --timingCacheFile requires option argument" << std::endl; + return false; + } + break; + default: return false; + } + } + return true; +} + +} // namespace samplesCommon + +#endif // TENSORRT_ARGS_PARSER_H diff --git a/src/Detector/tensorrt_yolo/common/bfloat16.cpp b/src/Detector/tensorrt_yolo/common/bfloat16.cpp new file mode 100644 index 00000000..8222826a --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/bfloat16.cpp @@ -0,0 +1,60 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bfloat16.h" +#include + +namespace sample +{ + +BFloat16::operator float() const +{ + static_assert(sizeof(uint32_t) == sizeof(float), ""); + float val{0.F}; + auto bits = static_cast(mRep) << 16; + std::memcpy(&val, &bits, sizeof(uint32_t)); + return val; +} + +BFloat16::BFloat16(float x) +{ + static_assert(sizeof(uint32_t) == sizeof(float), ""); + uint32_t bits{0}; + std::memcpy(&bits, &x, sizeof(float)); + + // FP32 format: 1 sign bit, 8 bit exponent, 23 bit mantissa + // BF16 format: 1 sign bit, 8 bit exponent, 7 bit mantissa + + // Mask for exponent + constexpr uint32_t exponent = 0xFFU << 23; + + // Check if exponent is all 1s (NaN or infinite) + if ((bits & exponent) != exponent) + { + // x is finite - round to even + bits += 0x7FFFU + (bits >> 16 & 1); + } + + mRep = static_cast(bits >> 16); +} + +BFloat16 operator+(BFloat16 x, BFloat16 y) +{ + return BFloat16(static_cast(x) + static_cast(y)); +} + +} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/bfloat16.h b/src/Detector/tensorrt_yolo/common/bfloat16.h new file mode 100644 index 00000000..0d0ab922 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/bfloat16.h @@ -0,0 +1,46 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace sample +{ + +//! Implements "Brain Floating Point": like an IEEE FP32, +//! but the significand is only 7 bits instead of 23 bits. +class BFloat16 +{ +public: + BFloat16() + : mRep(0) + { + } + + // Rounds to even if there is a tie. + BFloat16(float x); + + operator float() const; + +private: + //! Value stored in BFloat16 representation. + uint16_t mRep; +}; +BFloat16 operator+(BFloat16 x, BFloat16 y); + +} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/buffers.h b/src/Detector/tensorrt_yolo/common/buffers.h index ef673b2b..e58f2f5c 100644 --- a/src/Detector/tensorrt_yolo/common/buffers.h +++ b/src/Detector/tensorrt_yolo/common/buffers.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -238,28 +239,53 @@ class BufferManager public: static const size_t kINVALID_SIZE_VALUE = ~size_t(0); + //! + //! \brief Create a BufferManager for handling buffer interactions with engine, when the I/O tensor volumes + //! are provided + //! + BufferManager( + std::shared_ptr engine, std::vector const& volumes, int32_t batchSize = 0) + : mEngine(engine) + , mBatchSize(batchSize) + { + // Create host and device buffers + for (int32_t i = 0; i < mEngine->getNbIOTensors(); i++) + { + auto const name = engine->getIOTensorName(i); + mNames[name] = i; + + nvinfer1::DataType type = mEngine->getTensorDataType(name); + + std::unique_ptr manBuf{new ManagedBuffer()}; + manBuf->deviceBuffer = DeviceBuffer(volumes[i], type); + manBuf->hostBuffer = HostBuffer(volumes[i], type); + void* deviceBuffer = manBuf->deviceBuffer.data(); + mDeviceBindings.emplace_back(deviceBuffer); + mManagedBuffers.emplace_back(std::move(manBuf)); + } + } + //! //! \brief Create a BufferManager for handling buffer interactions with engine. //! - BufferManager(std::shared_ptr engine, const int batchSize, - const nvinfer1::IExecutionContext* context = nullptr) + BufferManager(std::shared_ptr engine, int32_t const batchSize = 0, + nvinfer1::IExecutionContext const* context = nullptr) : mEngine(engine) , mBatchSize(batchSize) { - // Full Dims implies no batch size. - auto impbs = engine->hasImplicitBatchDimension(); - std::cout << "hasImplicitBatchDimension: " << impbs << ", mBatchSize = " << mBatchSize << std::endl; - assert(engine->hasImplicitBatchDimension() || mBatchSize == 0); // Create host and device buffers - for (int i = 0; i < mEngine->getNbBindings(); i++) + for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++) { - auto dims = context ? context->getBindingDimensions(i) : mEngine->getBindingDimensions(i); + auto const name = engine->getIOTensorName(i); + mNames[name] = i; + + auto dims = context ? context->getTensorShape(name) : mEngine->getTensorShape(name); size_t vol = context || !mBatchSize ? 1 : static_cast(mBatchSize); - nvinfer1::DataType type = mEngine->getBindingDataType(i); - int vecDim = mEngine->getBindingVectorizedDim(i); + nvinfer1::DataType type = mEngine->getTensorDataType(name); + int32_t vecDim = mEngine->getTensorVectorizedDim(name); if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector { - int scalarsPerVec = mEngine->getBindingComponentsPerElement(i); + int32_t scalarsPerVec = mEngine->getTensorComponentsPerElement(name); dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec); vol *= scalarsPerVec; } @@ -267,7 +293,8 @@ class BufferManager std::unique_ptr manBuf{new ManagedBuffer()}; manBuf->deviceBuffer = DeviceBuffer(vol, type); manBuf->hostBuffer = HostBuffer(vol, type); - mDeviceBindings.emplace_back(manBuf->deviceBuffer.data()); + void* deviceBuffer = manBuf->deviceBuffer.data(); + mDeviceBindings.emplace_back(deviceBuffer); mManagedBuffers.emplace_back(std::move(manBuf)); } } @@ -284,7 +311,7 @@ class BufferManager //! //! \brief Returns a vector of device buffers. //! - const std::vector& getDeviceBindings() const + std::vector const& getDeviceBindings() const { return mDeviceBindings; } @@ -293,7 +320,7 @@ class BufferManager //! \brief Returns the device buffer corresponding to tensorName. //! Returns nullptr if no such tensor can be found. //! - void* getDeviceBuffer(const std::string& tensorName) const + void* getDeviceBuffer(std::string const& tensorName) const { return getBuffer(false, tensorName); } @@ -302,72 +329,21 @@ class BufferManager //! \brief Returns the host buffer corresponding to tensorName. //! Returns nullptr if no such tensor can be found. //! - void* getHostBuffer(const std::string& tensorName) const + void* getHostBuffer(std::string const& tensorName) const { return getBuffer(true, tensorName); } - //! - //! \brief Returns the host buffer corresponding to tensorName. - //! Returns nullptr if no such tensor can be found. - //! - void* getHostBuffer(int bindingIndex) const - { - return getBuffer(true, bindingIndex); - } - //! //! \brief Returns the size of the host and device buffers that correspond to tensorName. //! Returns kINVALID_SIZE_VALUE if no such tensor can be found. //! - size_t size(const std::string& tensorName) const + size_t size(std::string const& tensorName) const { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) + auto record = mNames.find(tensorName); + if (record == mNames.end()) return kINVALID_SIZE_VALUE; - return mManagedBuffers[index]->hostBuffer.nbBytes(); - } - - //! - //! \brief Dump host buffer with specified tensorName to ostream. - //! Prints error message to std::ostream if no such tensor can be found. - //! - void dumpBuffer(std::ostream& os, const std::string& tensorName) - { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) - { - os << "Invalid tensor name" << std::endl; - return; - } - void* buf = mManagedBuffers[index]->hostBuffer.data(); - size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes(); - nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index); - size_t rowCount = static_cast(bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize); - int leadDim = mBatchSize; - int* trailDims = bufDims.d; - int nbDims = bufDims.nbDims; - - // Fix explicit Dimension networks - if (!leadDim && nbDims > 0) - { - leadDim = bufDims.d[0]; - ++trailDims; - --nbDims; - } - - os << "[" << leadDim; - for (int i = 0; i < nbDims; i++) - os << ", " << trailDims[i]; - os << "]" << std::endl; - switch (mEngine->getBindingDataType(index)) - { - case nvinfer1::DataType::kINT32: print(os, buf, bufSize, rowCount); break; - case nvinfer1::DataType::kFLOAT: print(os, buf, bufSize, rowCount); break; - case nvinfer1::DataType::kHALF: print(os, buf, bufSize, rowCount); break; - case nvinfer1::DataType::kINT8: assert(0 && "Int8 network-level input and output is not supported"); break; - case nvinfer1::DataType::kBOOL: assert(0 && "Bool network-level input and output are not supported"); break; - } + return mManagedBuffers[record->second]->hostBuffer.nbBytes(); } //! @@ -382,7 +358,7 @@ class BufferManager assert(bufSize % sizeof(T) == 0); T* typedBuf = static_cast(buf); size_t numItems = bufSize / sizeof(T); - for (int i = 0; i < static_cast(numItems); i++) + for (int32_t i = 0; i < static_cast(numItems); i++) { // Handle rowCount == 1 case if (rowCount == 1 && i != static_cast(numItems) - 1) @@ -404,7 +380,7 @@ class BufferManager //! void copyInputToDevice() { - memcpyBuffers(true, false, false, 0); + memcpyBuffers(true, false, false); } //! @@ -412,13 +388,13 @@ class BufferManager //! void copyOutputToHost() { - memcpyBuffers(false, true, false, 0); + memcpyBuffers(false, true, false); } //! //! \brief Copy the contents of input host buffers to input device buffers asynchronously. //! - void copyInputToDeviceAsync(const cudaStream_t& stream) + void copyInputToDeviceAsync(cudaStream_t const& stream = 0) { memcpyBuffers(true, false, true, stream); } @@ -426,7 +402,7 @@ class BufferManager //! //! \brief Copy the contents of output device buffers to output host buffers asynchronously. //! - void copyOutputToHostAsync(const cudaStream_t& stream) + void copyOutputToHostAsync(cudaStream_t const& stream = 0) { memcpyBuffers(false, true, true, stream); } @@ -434,30 +410,31 @@ class BufferManager ~BufferManager() = default; private: - void* getBuffer(const bool isHost, const std::string& tensorName) const + void* getBuffer(bool const isHost, std::string const& tensorName) const { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) + auto record = mNames.find(tensorName); + if (record == mNames.end()) return nullptr; - return (isHost ? mManagedBuffers[index]->hostBuffer.data() : mManagedBuffers[index]->deviceBuffer.data()); + return (isHost ? mManagedBuffers[record->second]->hostBuffer.data() + : mManagedBuffers[record->second]->deviceBuffer.data()); } - void* getBuffer(const bool isHost, int bindingIndex) const + bool tenosrIsInput(const std::string& tensorName) const { - if (bindingIndex == -1) - return nullptr; - return (isHost ? mManagedBuffers[bindingIndex]->hostBuffer.data() : mManagedBuffers[bindingIndex]->deviceBuffer.data()); + return mEngine->getTensorIOMode(tensorName.c_str()) == nvinfer1::TensorIOMode::kINPUT; } - void memcpyBuffers(const bool copyInput, const bool deviceToHost, const bool async, const cudaStream_t& stream) + void memcpyBuffers(bool const copyInput, bool const deviceToHost, bool const async, cudaStream_t const& stream = 0) { - for (int i = 0; i < mEngine->getNbBindings(); i++) + for (auto const& n : mNames) { - void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data() : mManagedBuffers[i]->deviceBuffer.data(); - const void* srcPtr = deviceToHost ? mManagedBuffers[i]->deviceBuffer.data() : mManagedBuffers[i]->hostBuffer.data(); - const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes(); + void* dstPtr = deviceToHost ? mManagedBuffers[n.second]->hostBuffer.data() + : mManagedBuffers[n.second]->deviceBuffer.data(); + void const* srcPtr = deviceToHost ? mManagedBuffers[n.second]->deviceBuffer.data() + : mManagedBuffers[n.second]->hostBuffer.data(); + size_t const byteSize = mManagedBuffers[n.second]->hostBuffer.nbBytes(); const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice; - if ((copyInput && mEngine->bindingIsInput(i)) || (!copyInput && !mEngine->bindingIsInput(i))) + if ((copyInput && tenosrIsInput(n.first)) || (!copyInput && !tenosrIsInput(n.first))) { if (async) CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream)); @@ -468,9 +445,10 @@ class BufferManager } std::shared_ptr mEngine; //!< The pointer to the engine - int mBatchSize = 0; //!< The batch size for legacy networks, 0 otherwise. + int mBatchSize; //!< The batch size for legacy networks, 0 otherwise. std::vector> mManagedBuffers; //!< The vector of pointers to managed buffers - std::vector mDeviceBindings; //!< The vector of device buffers needed for engine execution + std::vector mDeviceBindings; //!< The vector of device buffers needed for engine execution + std::unordered_map mNames; //!< The map of tensor name and index pairs }; } // namespace samplesCommon diff --git a/src/Detector/tensorrt_yolo/common/common.h b/src/Detector/tensorrt_yolo/common/common.h index 2270a2cd..538c6094 100644 --- a/src/Detector/tensorrt_yolo/common/common.h +++ b/src/Detector/tensorrt_yolo/common/common.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -16,22 +17,13 @@ #ifndef TENSORRT_COMMON_H #define TENSORRT_COMMON_H - -// For loadLibrary -#ifdef _MSC_VER -// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. -#ifndef NOMINMAX -#define NOMINMAX -#endif -#include -#undef NOMINMAX -#else -#include -#endif - #include "NvInfer.h" +#if !TRT_WINML #include "NvInferPlugin.h" +#endif #include "logger.h" +#include "safeCommon.h" +#include "timingCache.h" #include #include #include @@ -39,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +45,15 @@ #include #include -#include "safeCommon.h" +#ifdef _MSC_VER +// For loadLibrary +// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include +#endif #ifdef _MSC_VER #define FN_NAME __FUNCTION__ @@ -82,7 +83,7 @@ if (!(condition)) \ { \ sample::gLogError << "Assertion failure: " << #condition << std::endl; \ - abort(); \ + exit(EXIT_FAILURE); \ } \ } while (0) @@ -96,7 +97,7 @@ OBJ_GUARD(T) makeObjGuard(T_* t) { CHECK(!(std::is_base_of::value || std::is_same::value)); - auto deleter = [](T* t) { t->destroy(); }; + auto deleter = [](T* t) { delete t; }; return std::unique_ptr{static_cast(t), deleter}; } @@ -113,21 +114,6 @@ constexpr long double operator"" _KiB(long double val) return val * (1 << 10); } -// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB. -// Since the return type is signed, -1_GiB will work as expected. -constexpr long long int operator"" _GiB(unsigned long long val) -{ - return val * (1 << 30); -} -constexpr long long int operator"" _MiB(unsigned long long val) -{ - return val * (1 << 20); -} -constexpr long long int operator"" _KiB(unsigned long long val) -{ - return val * (1 << 10); -} - struct SimpleProfiler : public nvinfer1::IProfiler { struct Record @@ -136,7 +122,7 @@ struct SimpleProfiler : public nvinfer1::IProfiler int count{0}; }; - virtual void reportLayerTime(const char* layerName, float ms) noexcept + void reportLayerTime(const char* layerName, float ms) noexcept override { mProfile[layerName].count++; mProfile[layerName].time += ms; @@ -183,7 +169,7 @@ struct SimpleProfiler : public nvinfer1::IProfiler auto old_precision = out.precision(); // Output header { - out << std::setw(maxLayerNameLength) << layerNameStr << " "; + out << std::setfill(' ') << std::setw(maxLayerNameLength) << layerNameStr << " "; out << std::setw(12) << "Runtime, " << "%" << " "; @@ -214,80 +200,12 @@ struct SimpleProfiler : public nvinfer1::IProfiler std::map mProfile; }; -//! Locate path to file, given its filename or filepath suffix and possible dirs it might lie in. -//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path. -inline std::string locateFile( - const std::string& filepathSuffix, const std::vector& directories, bool reportError = true) -{ - const int MAX_DEPTH{10}; - bool found{false}; - std::string filepath; - - for (auto& dir : directories) - { - if (!dir.empty() && dir.back() != '/') - { -#ifdef _MSC_VER - filepath = dir + "\\" + filepathSuffix; -#else - filepath = dir + "/" + filepathSuffix; -#endif - } - else - { - filepath = dir + filepathSuffix; - } - - for (int i = 0; i < MAX_DEPTH && !found; i++) - { - const std::ifstream checkFile(filepath); - found = checkFile.is_open(); - if (found) - { - break; - } - - filepath = "../" + filepath; // Try again in parent dir - } - - if (found) - { - break; - } - - filepath.clear(); - } - - // Could not find the file - if (filepath.empty()) - { - const std::string dirList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(), - [](const std::string& a, const std::string& b) { return a + "\n\t" + b; }); - std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << dirList << std::endl; - - if (reportError) - { - std::cout << "&&&& FAILED" << std::endl; - exit(EXIT_FAILURE); - } - } - - return filepath; -} - -inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW) -{ - std::ifstream infile(fileName, std::ifstream::binary); - assert(infile.is_open() && "Attempting to read from a file that is not open."); - std::string magic, h, w, max; - infile >> magic >> h >> w >> max; - infile.seekg(1, infile.cur); - infile.read(reinterpret_cast(buffer), inH * inW); -} - namespace samplesCommon { - +using nvinfer1::utils::loadTimingCacheFile; +using nvinfer1::utils::buildTimingCacheFromFile; +using nvinfer1::utils::saveTimingCacheFile; +using nvinfer1::utils::updateTimingCacheFile; // Swaps endianness of an integral type. template ::value, int>::type = 0> inline T swapEndianness(const T& value) @@ -339,7 +257,7 @@ class TypedHostMemory : public HostMemory { mData = new ElemType[size]; }; - ~TypedHostMemory() noexcept + ~TypedHostMemory() noexcept override { delete[](ElemType*) mData; } @@ -360,7 +278,7 @@ inline void* safeCudaMalloc(size_t memSize) if (deviceMem == nullptr) { std::cerr << "Out of memory" << std::endl; - exit(1); + exit(EXIT_FAILURE); } return deviceMem; } @@ -375,25 +293,20 @@ struct InferDeleter template void operator()(T* obj) const { -#if (NV_TENSORRT_MAJOR < 8) - obj->destroy(); -#else delete obj; -#endif } }; template -using SampleUniquePtr = std::unique_ptr; +using SampleUniquePtr = std::unique_ptr; -static auto StreamDeleter = [](cudaStream_t* pStream) +static auto StreamDeleter = [](cudaStream_t* pStream) { + if (pStream) { - if (pStream) - { - cudaStreamDestroy(*pStream); - delete pStream; - } - }; + static_cast(cudaStreamDestroy(*pStream)); + delete pStream; + } +}; inline std::unique_ptr makeCudaStream() { @@ -531,7 +444,7 @@ inline float getMaxValue(const float* buffer, int64_t size) // // The default parameter values choosen arbitrarily. Range values should be choosen such that // we avoid underflow or overflow. Also range value should be non zero to avoid uniform zero scale tensor. -inline void setAllDynamicRanges(nvinfer1::INetworkDefinition* network, float inRange = 2.0f, float outRange = 4.0f) +inline void setAllDynamicRanges(nvinfer1::INetworkDefinition* network, float inRange = 2.0F, float outRange = 4.0F) { // Ensure that all layer inputs have a scale. for (int i = 0; i < network->getNbLayers(); i++) @@ -579,14 +492,15 @@ inline void setDummyInt8DynamicRanges(const nvinfer1::IBuilderConfig* c, nvinfer // Set dummy per-tensor dynamic range if Int8 mode is requested. if (c->getFlag(nvinfer1::BuilderFlag::kINT8)) { - sample::gLogWarning - << "Int8 calibrator not provided. Generating dummy per-tensor dynamic range. Int8 accuracy is not guaranteed." - << std::endl; + sample::gLogWarning << "Int8 calibrator not provided. Generating dummy per-tensor dynamic range. Int8 accuracy " + "is not guaranteed." + << std::endl; setAllDynamicRanges(n); } } -inline void enableDLA(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, int useDLACore, bool allowGPUFallback = true) +inline void enableDLA( + nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, int useDLACore, bool allowGPUFallback = true) { if (useDLACore >= 0) { @@ -627,18 +541,28 @@ inline uint32_t getElementSize(nvinfer1::DataType t) noexcept { switch (t) { - case nvinfer1::DataType::kINT32: return 4; + case nvinfer1::DataType::kINT64: return 8; + case nvinfer1::DataType::kINT32: case nvinfer1::DataType::kFLOAT: return 4; + case nvinfer1::DataType::kBF16: case nvinfer1::DataType::kHALF: return 2; case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kINT8: return 1; + case nvinfer1::DataType::kUINT8: + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kFP8: return 1; + case nvinfer1::DataType::kINT4: + ASSERT(false && "Element size is not implemented for sub-byte data-types"); } return 0; } -inline int64_t volume(const nvinfer1::Dims& d) +inline int64_t volume(nvinfer1::Dims const& dims, int32_t start, int32_t stop) { - return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + ASSERT(start >= 0); + ASSERT(start <= stop); + ASSERT(stop <= dims.nbDims); + ASSERT(std::all_of(dims.d + start, dims.d + stop, [](int32_t x) { return x >= 0; })); + return std::accumulate(dims.d + start, dims.d + stop, int64_t{1}, std::multiplies{}); } template @@ -698,7 +622,7 @@ void writePPMFileWithBBox(const std::string& filename, PPM& ppm, const << ppm.w << " " << ppm.h << "\n" << ppm.max << "\n"; - auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; + auto round = [](float x) -> int { return int(std::floor(x + 0.5F)); }; const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); @@ -739,7 +663,7 @@ inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vec << "\n" << ppm.w << " " << ppm.h << "\n" << ppm.max << "\n"; - auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; + auto round = [](float x) -> int { return int(std::floor(x + 0.5F)); }; for (auto bbox : dets) { @@ -778,7 +702,7 @@ class TimerBase virtual void stop() {} float microseconds() const noexcept { - return mMs * 1000.f; + return mMs * 1000.F; } float milliseconds() const noexcept { @@ -786,15 +710,15 @@ class TimerBase } float seconds() const noexcept { - return mMs / 1000.f; + return mMs / 1000.F; } void reset() noexcept { - mMs = 0.f; + mMs = 0.F; } protected: - float mMs{0.0f}; + float mMs{0.0F}; }; class GpuTimer : public TimerBase @@ -811,14 +735,14 @@ class GpuTimer : public TimerBase CHECK(cudaEventDestroy(mStart)); CHECK(cudaEventDestroy(mStop)); } - void start() + void start() override { CHECK(cudaEventRecord(mStart, mStream)); } - void stop() + void stop() override { CHECK(cudaEventRecord(mStop, mStream)); - float ms{0.0f}; + float ms{0.0F}; CHECK(cudaEventSynchronize(mStop)); CHECK(cudaEventElapsedTime(&ms, mStart, mStop)); mMs += ms; @@ -835,11 +759,11 @@ class CpuTimer : public TimerBase public: using clock_type = Clock; - void start() + void start() override { mStart = Clock::now(); } - void stop() + void stop() override { mStop = Clock::now(); mMs += std::chrono::duration{mStop - mStart}.count(); @@ -865,13 +789,7 @@ inline std::vector splitString(std::string str, char delimiter = ', return splitVect; } -// Return m rounded up to nearest multiple of n -inline int roundUp(int m, int n) -{ - return ((m + n - 1) / n) * n; -} - -inline int getC(const nvinfer1::Dims& d) +inline int getC(nvinfer1::Dims const& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; } @@ -886,54 +804,111 @@ inline int getW(const nvinfer1::Dims& d) return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; } -inline void loadLibrary(const std::string& path) +//! Platform-agnostic wrapper around dynamic libraries. +class DynamicLibrary { -#ifdef _MSC_VER - void* handle = LoadLibrary(path.c_str()); -#else - int32_t flags{RTLD_LAZY}; +public: + explicit DynamicLibrary(std::string const& name) + : mLibName{name} + { +#if defined(_WIN32) + mHandle = LoadLibraryA(name.c_str()); +#else // defined(_WIN32) + int32_t flags{RTLD_LAZY}; #if ENABLE_ASAN - // https://github.com/google/sanitizers/issues/89 - // asan doesn't handle module unloading correctly and there are no plans on doing - // so. In order to get proper stack traces, don't delete the shared library on - // close so that asan can resolve the symbols correctly. - flags |= RTLD_NODELETE; + // https://github.com/google/sanitizers/issues/89 + // asan doesn't handle module unloading correctly and there are no plans on doing + // so. In order to get proper stack traces, don't delete the shared library on + // close so that asan can resolve the symbols correctly. + flags |= RTLD_NODELETE; #endif // ENABLE_ASAN - void* handle = dlopen(path.c_str(), flags); + mHandle = dlopen(name.c_str(), flags); +#endif // defined(_WIN32) + + if (mHandle == nullptr) + { + std::string errorStr{}; +#if !defined(_WIN32) + errorStr = std::string{" due to "} + std::string{dlerror()}; #endif - if (handle == nullptr) + throw std::runtime_error("Unable to open library: " + name + errorStr); + } + } + + DynamicLibrary(DynamicLibrary const&) = delete; + DynamicLibrary(DynamicLibrary const&&) = delete; + + //! + //! Retrieve a function symbol from the loaded library. + //! + //! \return the loaded symbol on success + //! \throw std::invalid_argument if loading the symbol failed. + //! + template + std::function symbolAddress(char const* name) { -#ifdef _MSC_VER - sample::gLogError << "Could not load plugin library: " << path << std::endl; + if (mHandle == nullptr) + { + throw std::runtime_error("Handle to library is nullptr."); + } + void* ret; +#if defined(_MSC_VER) + ret = static_cast(GetProcAddress(static_cast(mHandle), name)); #else - sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl; + ret = dlsym(mHandle, name); #endif + if (ret == nullptr) + { + std::string const kERROR_MSG(mLibName + ": error loading symbol: " + std::string(name)); + throw std::invalid_argument(kERROR_MSG); + } + return reinterpret_cast(ret); } -} -inline int32_t getSMVersion() -{ - int32_t deviceIndex = 0; - CHECK(cudaGetDevice(&deviceIndex)); + ~DynamicLibrary() + { + try + { +#if defined(_WIN32) + ASSERT(static_cast(FreeLibrary(static_cast(mHandle)))); +#else + ASSERT(dlclose(mHandle) == 0); +#endif + } + catch (...) + { + sample::gLogError << "Unable to close library: " << mLibName << std::endl; + } + } - int32_t major, minor; - CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); - CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); +private: + std::string mLibName{}; //!< Name of the DynamicLibrary + void* mHandle{}; //!< Handle to the DynamicLibrary +}; - return ((major << 8) | minor); +inline std::unique_ptr loadLibrary(std::string const& path) +{ + // make_unique not available until C++14 - we still need to support C++11 builds. + return std::unique_ptr(new DynamicLibrary{path}); } -inline bool isSMSafe() +inline int32_t getMaxPersistentCacheSize() { - const int32_t smVersion = getSMVersion(); - return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 || - smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807; + int32_t deviceIndex{}; + CHECK(cudaGetDevice(&deviceIndex)); + + int32_t maxPersistentL2CacheSize{}; +#if CUDART_VERSION >= 11030 && !TRT_WINML + CHECK(cudaDeviceGetAttribute(&maxPersistentL2CacheSize, cudaDevAttrMaxPersistingL2CacheSize, deviceIndex)); +#endif + + return maxPersistentL2CacheSize; } inline bool isDataTypeSupported(nvinfer1::DataType dataType) { - auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); + auto builder = SampleUniquePtr(createBuilder()); if (!builder) { return false; @@ -947,7 +922,6 @@ inline bool isDataTypeSupported(nvinfer1::DataType dataType) return true; } - } // namespace samplesCommon inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) diff --git a/src/Detector/tensorrt_yolo/common/dumpTFWts.py b/src/Detector/tensorrt_yolo/common/dumpTFWts.py new file mode 100644 index 00000000..70770fbd --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/dumpTFWts.py @@ -0,0 +1,124 @@ +#!/usr/bin/python +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to dump TensorFlow weights in TRT v1 and v2 dump format. +# The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later. + +import sys +import struct +import argparse + +try: + import tensorflow as tf + from tensorflow.python import pywrap_tensorflow +except ImportError as err: + sys.stderr.write("""Error: Failed to import module ({})""".format(err)) + sys.exit() + +parser = argparse.ArgumentParser(description="TensorFlow Weight Dumper") + +parser.add_argument( + "-m", + "--model", + required=True, + help="The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908", +) +parser.add_argument("-o", "--output", required=True, help="The weight file to dump all the weights to.") +parser.add_argument("-1", "--wtsv1", required=False, default=False, type=bool, help="Dump the weights in the wts v1.") + +opt = parser.parse_args() + +if opt.wtsv1: + print("Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:") + print("Line 0: ") + print("Line 1-Num: [buffer name] [buffer type] [buffer size] ") +else: + print("Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:") + print("Line 0: ") + print("Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] ") + +inputbase = opt.model +outputbase = opt.output + + +def float_to_hex(f): + return hex(struct.unpack(" +#include +#include + +namespace nvinfer1 +{ +namespace utils +{ +FileLock::FileLock(ILogger& logger, std::string const& fileName) + : mLogger(logger) + , mFileName(fileName) +{ + std::string lockFileName = mFileName + ".lock"; +#ifdef _MSC_VER + { + std::stringstream ss; + ss << "Trying to set exclusive file lock " << lockFileName << std::endl; + mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str()); + } + // MS docs said this is a blocking IO if "FILE_FLAG_OVERLAPPED" is not provided + mHandle = CreateFileA(lockFileName.c_str(), GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, 0, NULL); + if (mHandle == INVALID_HANDLE_VALUE) + { + throw std::runtime_error("Failed to lock " + lockFileName + "!"); + } +#elif defined(__QNX__) + // We once enabled the file lock on QNX, lockf(F_TLOCK) return -1 and the reported error is + // The error generated was 89, which means that the function is not implemented. +#else + mHandle = fopen(lockFileName.c_str(), "wb+"); + if (mHandle == nullptr) + { + throw std::runtime_error("Cannot open " + lockFileName + "!"); + } + { + std::stringstream ss; + ss << "Trying to set exclusive file lock " << lockFileName << std::endl; + mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str()); + } + mDescriptor = fileno(mHandle); + auto ret = lockf(mDescriptor, F_LOCK, 0); + if (ret != 0) + { + mDescriptor = -1; + fclose(mHandle); + throw std::runtime_error("Failed to lock " + lockFileName + "!"); + } +#endif +} + +FileLock::~FileLock() +{ + std::string lockFileName = mFileName + ".lock"; +#ifdef _MSC_VER + if (mHandle != INVALID_HANDLE_VALUE) + { + CloseHandle(mHandle); + } +#elif defined(__QNX__) + // We once enabled the file lock on QNX, lockf(F_TLOCK) return -1 and the reported error is + // The error generated was 89 + // That means : Function not implemented +#else + if (mDescriptor != -1) + { + auto ret = lockf(mDescriptor, F_ULOCK, 0); + if (mHandle != nullptr) + { + fclose(mHandle); + } + if (ret != 0) + { + std::stringstream ss; + ss << "Failed to unlock " << lockFileName << ", please remove " << lockFileName << ".lock manually!" + << std::endl; + mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str()); + } + } +#endif +} +} // namespace utils +} // namespace nvinfer1 diff --git a/src/Detector/tensorrt_yolo/common/fileLock.h b/src/Detector/tensorrt_yolo/common/fileLock.h new file mode 100644 index 00000000..d0f64a5b --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/fileLock.h @@ -0,0 +1,86 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_SAMPLES_COMMON_FILELOCK_H_ +#define TENSORRT_SAMPLES_COMMON_FILELOCK_H_ +#include "NvInfer.h" +#ifdef _MSC_VER +// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include // fileno +#include // lockf +#endif +#include + +namespace nvinfer1 +{ +namespace utils +{ +//! +//! \brief RAII object that locks a the specified file. +//! +//! The FileLock class uses a lock file to specify that the +//! current file is being used by a TensorRT tool or sample +//! so that things like the TimingCache can be updated across +//! processes without having conflicts. +//! +class FileLock +{ +public: + FileLock(nvinfer1::ILogger& logger, std::string const& fileName); + ~FileLock(); + FileLock() = delete; // no default ctor + FileLock(FileLock const&) = delete; // no copy ctor + FileLock& operator=(FileLock const&) = delete; // no copy assignment + FileLock(FileLock&&) = delete; // no move ctor + FileLock& operator=(FileLock&&) = delete; // no move assignment + +private: + //! + //! The logger that emits any error messages that might show up. + //! + nvinfer1::ILogger& mLogger; + + //! + //! The filename that the FileLock is protecting from multiple + //! TensorRT processes from writing to. + //! + std::string const mFileName; + +#ifdef _MSC_VER + //! + //! The file handle on windows for the file lock. + //! + HANDLE mHandle{}; +#else + //! + //! The file handle on linux for the file lock. + //! + FILE* mHandle{}; + //! + //! The file descriptor on linux of the file lock. + //! + int32_t mDescriptor{-1}; +#endif +}; // class FileLock +} // namespace utils +} // namespace nvinfer1 + +#endif // TENSORRT_SAMPLES_COMMON_FILELOCK_H_ diff --git a/src/Detector/tensorrt_yolo/common/getOptions.cpp b/src/Detector/tensorrt_yolo/common/getOptions.cpp new file mode 100644 index 00000000..19cd3281 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/getOptions.cpp @@ -0,0 +1,248 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "getOptions.h" +#include "logger.h" + +#include +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace utility +{ + +//! Matching for TRTOptions is defined as follows: +//! +//! If A and B both have longName set, A matches B if and only if A.longName == +//! B.longName and (A.shortName == B.shortName if both have short name set). +//! +//! If A only has shortName set and B only has longName set, then A does not +//! match B. It is assumed that when 2 TRTOptions are compared, one of them is +//! the definition of a TRTOption in the input to getOptions. As such, if the +//! definition only has shortName set, it will never be equal to a TRTOption +//! that does not have shortName set (and same for longName). +//! +//! If A and B both have shortName set but B does not have longName set, A +//! matches B if and only if A.shortName == B.shortName. +//! +//! If A has neither long or short name set, A matches B if and only if B has +//! neither long or short name set. +bool matches(const TRTOption& a, const TRTOption& b) +{ + if (!a.longName.empty() && !b.longName.empty()) + { + if (a.shortName && b.shortName) + { + return (a.longName == b.longName) && (a.shortName == b.shortName); + } + return a.longName == b.longName; + } + + // If only one of them is not set, this will return false anyway. + return a.shortName == b.shortName; +} + +//! getTRTOptionIndex returns the index of a TRTOption in a vector of +//! TRTOptions, -1 if not found. +int getTRTOptionIndex(const std::vector& options, const TRTOption& opt) +{ + for (size_t i = 0; i < options.size(); ++i) + { + if (matches(opt, options[i])) + { + return i; + } + } + return -1; +} + +//! validateTRTOption will return a string containing an error message if options +//! contain non-numeric characters, or if there are duplicate option names found. +//! Otherwise, returns the empty string. +std::string validateTRTOption( + const std::set& seenShortNames, const std::set& seenLongNames, const TRTOption& opt) +{ + if (opt.shortName != 0) + { + if (!std::isalnum(opt.shortName)) + { + return "Short name '" + std::to_string(opt.shortName) + "' is non-alphanumeric"; + } + + if (seenShortNames.find(opt.shortName) != seenShortNames.end()) + { + return "Short name '" + std::to_string(opt.shortName) + "' is a duplicate"; + } + } + + if (!opt.longName.empty()) + { + for (const char& c : opt.longName) + { + if (!std::isalnum(c) && c != '-' && c != '_') + { + return "Long name '" + opt.longName + "' contains characters that are not '-', '_', or alphanumeric"; + } + } + + if (seenLongNames.find(opt.longName) != seenLongNames.end()) + { + return "Long name '" + opt.longName + "' is a duplicate"; + } + } + return ""; +} + +//! validateTRTOptions will return a string containing an error message if any +//! options contain non-numeric characters, or if there are duplicate option +//! names found. Otherwise, returns the empty string. +std::string validateTRTOptions(const std::vector& options) +{ + std::set seenShortNames; + std::set seenLongNames; + for (size_t i = 0; i < options.size(); ++i) + { + const std::string errMsg = validateTRTOption(seenShortNames, seenLongNames, options[i]); + if (!errMsg.empty()) + { + return "Error '" + errMsg + "' at TRTOption " + std::to_string(i); + } + + seenShortNames.insert(options[i].shortName); + seenLongNames.insert(options[i].longName); + } + return ""; +} + +//! parseArgs parses an argument list and returns a TRTParsedArgs with the +//! fields set accordingly. Assumes that options is validated. +//! ErrMsg will be set if: +//! - an argument is null +//! - an argument is empty +//! - an argument does not have option (i.e. "-" and "--") +//! - a short argument has more than 1 character +//! - the last argument in the list requires a value +TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector& options) +{ + TRTParsedArgs parsedArgs; + parsedArgs.values.resize(options.size()); + + for (int i = 1; i < argc; ++i) // index of current command-line argument + { + if (argv[i] == nullptr) + { + return TRTParsedArgs{"Null argument at index " + std::to_string(i)}; + } + + const std::string argStr(argv[i]); + if (argStr.empty()) + { + return TRTParsedArgs{"Empty argument at index " + std::to_string(i)}; + } + + // No starting hyphen means it is a positional argument + if (argStr[0] != '-') + { + parsedArgs.positionalArgs.push_back(argStr); + continue; + } + + if (argStr == "-" || argStr == "--") + { + return TRTParsedArgs{"Argument does not specify an option at index " + std::to_string(i)}; + } + + // If only 1 hyphen, char after is the flag. + TRTOption opt{' ', "", false, ""}; + std::string value; + if (argStr[1] != '-') + { + // Must only have 1 char after the hyphen + if (argStr.size() > 2) + { + return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)}; + } + opt.shortName = argStr[1]; + } + else + { + opt.longName = argStr.substr(2); + + // We need to support --foo=bar syntax, so look for '=' + const size_t eqIndex = opt.longName.find('='); + if (eqIndex < opt.longName.size()) + { + value = opt.longName.substr(eqIndex + 1); + opt.longName = opt.longName.substr(0, eqIndex); + } + } + + const int idx = getTRTOptionIndex(options, opt); + if (idx < 0) + { + continue; + } + + if (options[idx].valueRequired) + { + if (!value.empty()) + { + parsedArgs.values[idx].second.push_back(value); + parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + continue; + } + + if (i + 1 >= argc) + { + return TRTParsedArgs{"Last argument requires value, but none given"}; + } + + const std::string nextArg(argv[i + 1]); + if (nextArg.size() >= 1 && nextArg[0] == '-') + { + sample::gLogWarning << "Warning: Using '" << nextArg << "' as a value for '" << argStr + << "', Should this be its own flag?" << std::endl; + } + + parsedArgs.values[idx].second.push_back(nextArg); + i += 1; // Next argument already consumed + + parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + } + else + { + parsedArgs.values[idx].first += 1; + } + } + return parsedArgs; +} + +TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector& options) +{ + const std::string errMsg = validateTRTOptions(options); + if (!errMsg.empty()) + { + return TRTParsedArgs{errMsg}; + } + return parseArgs(argc, argv, options); +} +} // namespace utility +} // namespace nvinfer1 diff --git a/src/Detector/tensorrt_yolo/common/getOptions.h b/src/Detector/tensorrt_yolo/common/getOptions.h new file mode 100644 index 00000000..4bbf9e27 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/getOptions.h @@ -0,0 +1,128 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_GET_OPTIONS_H +#define TRT_GET_OPTIONS_H + +#include +#include +#include + +namespace nvinfer1 +{ +namespace utility +{ + +//! TRTOption defines a command line option. At least 1 of shortName and longName +//! must be defined. +//! If bool initialization is undefined behavior on your system, valueRequired +//! must also be explicitly defined. +//! helpText is optional. +struct TRTOption +{ + char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b) + std::string longName; //!< Option name in long (double hyphen) form (i.e. --foo, --bar) + bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4, --foo bar) + std::string helpText; //!< Text to show when printing out the command usage +}; + +//! TRTParsedArgs is returned by getOptions after it has parsed a command line +//! argument list (argv). +//! +//! errMsg is a string containing an error message if any errors occurred. If it +//! is empty, no errors occurred. +//! +//! values stores a vector of pairs for each option (ordered by order in the +//! input). Each pair contains an int (the number of occurrences) and a vector +//! of strings (a list of values). The user should know which of these to use, +//! and which options required values. For non-value options, only occurrences is +//! populated. For value-required options, occurrences == # of values. Values do +//! not need to be unique. +//! +//! positionalArgs stores additional arguments that are passed in without an +//! option (these must not start with a hyphen). +struct TRTParsedArgs +{ + std::string errMsg; + std::vector>> values; + std::vector positionalArgs; +}; + +//! Parse the input arguments passed to main() and extract options as well as +//! positional arguments. +//! +//! Options are supposed to be passed to main() with a preceding hyphen '-'. +//! +//! If there is a single preceding hyphen, there should be exactly 1 character +//! after the hyphen, which is interpreted as the option. +//! +//! If there are 2 preceding hyphens, the entire argument (without the hyphens) +//! is interpreted as the option. +//! +//! If the option requires a value, the next argument is used as the value. +//! +//! Positional arguments must not start with a hyphen. +//! +//! If an argument requires a value, the next argument is interpreted as the +//! value, even if it is the form of a valid option (i.e. --foo --bar will store +//! "--bar" as a value for option "foo" if "foo" requires a value). +//! We also support --name=value syntax. In this case, 'value' would be used as +//! the value, NOT the next argument. +//! +//! For options: +//! { { 'a', "", false }, +//! { 'b', "", false }, +//! { 0, "cee", false }, +//! { 'd', "", true }, +//! { 'e', "", true }, +//! { 'f', "foo", true } } +//! +//! ./main hello world -a -a --cee -d 12 -f 34 +//! and +//! ./main hello world -a -a --cee -d 12 --foo 34 +//! +//! will result in: +//! +//! TRTParsedArgs { +//! errMsg: "", +//! values: { { 2, {} }, +//! { 0, {} }, +//! { 1, {} }, +//! { 1, {"12"} }, +//! { 0, {} }, +//! { 1, {"34"} } } +//! positionalArgs: {"hello", "world"}, +//! } +//! +//! Non-POSIX behavior: +//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each +//! option must have its own hyphen prefix. +//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be +//! whitespace-separated from the option it is for. +//! +//! @param[in] argc The number of arguments passed to main (including the +//! file name, which is disregarded) +//! @param[in] argv The arguments passed to main (including the file name, +//! which is disregarded) +//! @param[in] options List of TRTOptions to parse +//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of +//! the fields. +TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector& options); +} // namespace utility +} // namespace nvinfer1 + +#endif // TRT_GET_OPTIONS_H diff --git a/src/Detector/tensorrt_yolo/common/getopt.c b/src/Detector/tensorrt_yolo/common/getopt.c new file mode 100644 index 00000000..c1da08b5 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/getopt.c @@ -0,0 +1,568 @@ +/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */ +/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */ + +/* + * Copyright (c) 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "getoptWin.h" +#include +#include +#include +#include +#include +#include + +#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ + +#ifdef REPLACE_GETOPT +int opterr = 1; /* if error message should be printed */ +int optind = 1; /* index into parent argv vector */ +int optopt = '?'; /* character checked for validity */ +#undef optreset /* see getopt.h */ +#define optreset __mingw_optreset +int optreset; /* reset getopt */ +char* optarg; /* argument associated with option */ +#endif + +#define PRINT_ERROR ((opterr) && (*options != ':')) + +#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ +#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ +#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ + +/* return values */ +#define BADCH (int) '?' +#define BADARG ((*options == ':') ? (int) ':' : (int) '?') +#define INORDER (int) 1 + +#ifndef __CYGWIN__ +#define __progname __argv[0] +#else +extern char __declspec(dllimport) * __progname; +#endif + +#ifdef __CYGWIN__ +static char EMSG[] = ""; +#else +#define EMSG "" +#endif + +static int getopt_internal(int, char* const*, char const*, const struct option*, int*, int); +static int parse_long_options(char* const*, char const*, const struct option*, int*, int); +static int gcd(int, int); +static void permute_args(int, int, int, char* const*); + +static char* place = EMSG; /* option letter processing */ + +/* XXX: set optreset to 1 rather than these two */ +static int nonopt_start = -1; /* first non option argument (for permute) */ +static int nonopt_end = -1; /* first option after non options (for permute) */ + +/* Error messages */ +static char const recargchar[] = "option requires an argument -- %c"; +static char const recargstring[] = "option requires an argument -- %s"; +static char const ambig[] = "ambiguous option -- %.*s"; +static char const noarg[] = "option doesn't take an argument -- %.*s"; +static char const illoptchar[] = "unknown option -- %c"; +static char const illoptstring[] = "unknown option -- %s"; + +static void _vwarnx(char const* fmt, va_list ap) +{ + (void) fprintf(stderr, "%s: ", __progname); + if (fmt != NULL) + (void) vfprintf(stderr, fmt, ap); + (void) fprintf(stderr, "\n"); +} + +static void warnx(char const* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + _vwarnx(fmt, ap); + va_end(ap); +} + +/* + * Compute the greatest common divisor of a and b. + */ +static int gcd(int a, int b) +{ + int c; + + c = a % b; + while (c != 0) + { + a = b; + b = c; + c = a % b; + } + + return (b); +} + +/* + * Exchange the block from nonopt_start to nonopt_end with the block + * from nonopt_end to opt_end (keeping the same order of arguments + * in each block). + */ +static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv) +{ + int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; + char* swap; + + /* + * compute lengths of blocks and number and size of cycles + */ + nnonopts = panonopt_end - panonopt_start; + nopts = opt_end - panonopt_end; + ncycle = gcd(nnonopts, nopts); + cyclelen = (opt_end - panonopt_start) / ncycle; + + for (i = 0; i < ncycle; i++) + { + cstart = panonopt_end + i; + pos = cstart; + for (j = 0; j < cyclelen; j++) + { + if (pos >= panonopt_end) + pos -= nnonopts; + else + pos += nopts; + swap = nargv[pos]; + /* LINTED const cast */ + ((char**) nargv)[pos] = nargv[cstart]; + /* LINTED const cast */ + ((char**) nargv)[cstart] = swap; + } + } +} + +/* + * parse_long_options -- + * Parse long options in argc/argv argument vector. + * Returns -1 if short_too is set and the option does not match long_options. + */ +static int parse_long_options( + char* const* nargv, char const* options, const struct option* long_options, int* idx, int short_too) +{ + char *current_argv, *has_equal; + size_t current_argv_len; + int i, ambiguous, match; + +#define IDENTICAL_INTERPRETATION(_x, _y) \ + (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \ + && long_options[(_x)].val == long_options[(_y)].val) + + current_argv = place; + match = -1; + ambiguous = 0; + + optind++; + + if ((has_equal = strchr(current_argv, '=')) != NULL) + { + /* argument found (--option=arg) */ + current_argv_len = has_equal - current_argv; + has_equal++; + } + else + current_argv_len = strlen(current_argv); + + for (i = 0; long_options[i].name; i++) + { + /* find matching long option */ + if (strncmp(current_argv, long_options[i].name, current_argv_len)) + continue; + + if (strlen(long_options[i].name) == current_argv_len) + { + /* exact match */ + match = i; + ambiguous = 0; + break; + } + /* + * If this is a known short option, don't allow + * a partial match of a single character. + */ + if (short_too && current_argv_len == 1) + continue; + + if (match == -1) /* partial match */ + match = i; + else if (!IDENTICAL_INTERPRETATION(i, match)) + ambiguous = 1; + } + if (ambiguous) + { + /* ambiguous abbreviation */ + if (PRINT_ERROR) + warnx(ambig, (int) current_argv_len, current_argv); + optopt = 0; + return (BADCH); + } + if (match != -1) + { /* option found */ + if (long_options[match].has_arg == no_argument && has_equal) + { + if (PRINT_ERROR) + warnx(noarg, (int) current_argv_len, current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + return (BADARG); + } + if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument) + { + if (has_equal) + optarg = has_equal; + else if (long_options[match].has_arg == required_argument) + { + /* + * optional argument doesn't use next nargv + */ + optarg = nargv[optind++]; + } + } + if ((long_options[match].has_arg == required_argument) && (optarg == NULL)) + { + /* + * Missing argument; leading ':' indicates no error + * should be generated. + */ + if (PRINT_ERROR) + warnx(recargstring, current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + --optind; + return (BADARG); + } + } + else + { /* unknown option */ + if (short_too) + { + --optind; + return (-1); + } + if (PRINT_ERROR) + warnx(illoptstring, current_argv); + optopt = 0; + return (BADCH); + } + if (idx) + *idx = match; + if (long_options[match].flag) + { + *long_options[match].flag = long_options[match].val; + return (0); + } + else + return (long_options[match].val); +#undef IDENTICAL_INTERPRETATION +} + +/* + * getopt_internal -- + * Parse argc/argv argument vector. Called by user level routines. + */ +static int getopt_internal( + int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx, int flags) +{ + char const* oli; /* option letter list index */ + int optchar, short_too; + static int posixly_correct = -1; + + if (options == NULL) + return (-1); + + /* + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ + if (optind == 0) + optind = optreset = 1; + + /* + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + * + * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or + * optreset != 0 for GNU compatibility. + */ + if (posixly_correct == -1 || optreset != 0) + posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); + if (*options == '-') + flags |= FLAG_ALLARGS; + else if (posixly_correct || *options == '+') + flags &= ~FLAG_PERMUTE; + if (*options == '+' || *options == '-') + options++; + + optarg = NULL; + if (optreset) + nonopt_start = nonopt_end = -1; +start: + if (optreset || !*place) + { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc) + { /* end of argument vector */ + place = EMSG; + if (nonopt_end != -1) + { + /* do permutation, if we have to */ + permute_args(nonopt_start, nonopt_end, optind, nargv); + optind -= nonopt_end - nonopt_start; + } + else if (nonopt_start != -1) + { + /* + * If we skipped non-options, set optind + * to the first of them. + */ + optind = nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL)) + { + place = EMSG; /* found non-option */ + if (flags & FLAG_ALLARGS) + { + /* + * GNU extension: + * return non-option as argument to option 1 + */ + optarg = nargv[optind++]; + return (INORDER); + } + if (!(flags & FLAG_PERMUTE)) + { + /* + * If no permutation wanted, stop parsing + * at first non-option. + */ + return (-1); + } + /* do permutation */ + if (nonopt_start == -1) + nonopt_start = optind; + else if (nonopt_end != -1) + { + permute_args(nonopt_start, nonopt_end, optind, nargv); + nonopt_start = optind - (nonopt_end - nonopt_start); + nonopt_end = -1; + } + optind++; + /* process next argument */ + goto start; + } + if (nonopt_start != -1 && nonopt_end == -1) + nonopt_end = optind; + + /* + * If we have "-" do nothing, if "--" we are done. + */ + if (place[1] != '\0' && *++place == '-' && place[1] == '\0') + { + optind++; + place = EMSG; + /* + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ + if (nonopt_end != -1) + { + permute_args(nonopt_start, nonopt_end, optind, nargv); + optind -= nonopt_end - nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + } + + /* + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ + if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) + { + short_too = 0; + if (*place == '-') + place++; /* --foo long option */ + else if (*place != ':' && strchr(options, *place) != NULL) + short_too = 1; /* could be short option too */ + + optchar = parse_long_options(nargv, options, long_options, idx, short_too); + if (optchar != -1) + { + place = EMSG; + return (optchar); + } + } + + if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0') + || (oli = strchr(options, optchar)) == NULL) + { + /* + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ + if (optchar == (int) '-' && *place == '\0') + return (-1); + if (!*place) + ++optind; + if (PRINT_ERROR) + warnx(illoptchar, optchar); + optopt = optchar; + return (BADCH); + } + if (long_options != NULL && optchar == 'W' && oli[1] == ';') + { + /* -W long-option */ + if (*place) /* no space */ + /* NOTHING */; + else if (++optind >= nargc) + { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } + else /* white space */ + place = nargv[optind]; + optchar = parse_long_options(nargv, options, long_options, idx, 0); + place = EMSG; + return (optchar); + } + if (*++oli != ':') + { /* doesn't take argument */ + if (!*place) + ++optind; + } + else + { /* takes (optional) argument */ + optarg = NULL; + if (*place) /* no white space */ + optarg = place; + else if (oli[1] != ':') + { /* arg not optional */ + if (++optind >= nargc) + { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } + else + optarg = nargv[optind]; + } + place = EMSG; + ++optind; + } + /* dump back option letter */ + return (optchar); +} + +#ifdef REPLACE_GETOPT +/* + * getopt -- + * Parse argc/argv argument vector. + * + * [eventually this will replace the BSD getopt] + */ +int getopt(int nargc, char* const* nargv, char const* options) +{ + + /* + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ + return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); +} +#endif /* REPLACE_GETOPT */ + +/* + * getopt_long -- + * Parse argc/argv argument vector. + */ +int getopt_long(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE)); +} + +/* + * getopt_long_only -- + * Parse argc/argv argument vector. + */ +int getopt_long_only(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY)); +} diff --git a/src/Detector/tensorrt_yolo/common/getoptWin.h b/src/Detector/tensorrt_yolo/common/getoptWin.h new file mode 100644 index 00000000..a1dc6ffa --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/getoptWin.h @@ -0,0 +1,124 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __GETOPT_H__ +/** + * DISCLAIMER + * This file has no copyright assigned and is placed in the Public Domain. + * This file is a part of the w64 mingw-runtime package. + * + * The w64 mingw-runtime package and its code is distributed in the hope that it + * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR + * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to + * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#define __GETOPT_H__ + +/* All the headers include this file. */ +#include + +#if defined(WINGETOPT_SHARED_LIB) +#if defined(BUILDING_WINGETOPT_DLL) +#define WINGETOPT_API __declspec(dllexport) +#else +#define WINGETOPT_API __declspec(dllimport) +#endif +#else +#define WINGETOPT_API +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + + WINGETOPT_API extern int optind; /* index of first non-option in argv */ + WINGETOPT_API extern int optopt; /* single option character, as parsed */ + WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */ + /* (user may set to zero, to suppress) */ + + WINGETOPT_API extern char* optarg; /* pointer to argument of current option */ + + extern int getopt(int nargc, char* const* nargv, char const* options); + +#ifdef _BSD_SOURCE +/* + * BSD adds the non-standard `optreset' feature, for reinitialisation + * of `getopt' parsing. We support this feature, for applications which + * proclaim their BSD heritage, before including this header; however, + * to maintain portability, developers are advised to avoid it. + */ +#define optreset __mingw_optreset + extern int optreset; +#endif +#ifdef __cplusplus +} +#endif +/* + * POSIX requires the `getopt' API to be specified in `unistd.h'; + * thus, `unistd.h' includes this header. However, we do not want + * to expose the `getopt_long' or `getopt_long_only' APIs, when + * included in this manner. Thus, close the standard __GETOPT_H__ + * declarations block, and open an additional __GETOPT_LONG_H__ + * specific block, only when *not* __UNISTD_H_SOURCED__, in which + * to declare the extended API. + */ +#endif /* !defined(__GETOPT_H__) */ + +#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) +#define __GETOPT_LONG_H__ + +#ifdef __cplusplus +extern "C" +{ +#endif + + struct option /* specification for a long form option... */ + { + char const* name; /* option name, without leading hyphens */ + int has_arg; /* does it take an argument? */ + int* flag; /* where to save its status, or NULL */ + int val; /* its associated status value */ + }; + + enum /* permitted values for its `has_arg' field... */ + { + no_argument = 0, /* option never takes an argument */ + required_argument, /* option always requires an argument */ + optional_argument /* option may take an argument */ + }; + + extern int getopt_long( + int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx); + extern int getopt_long_only( + int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx); +/* + * Previous MinGW implementation had... + */ +#ifndef HAVE_DECL_GETOPT +/* + * ...for the long form API only; keep this for compatibility. + */ +#define HAVE_DECL_GETOPT 1 +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ diff --git a/src/Detector/tensorrt_yolo/common/half.h b/src/Detector/tensorrt_yolo/common/half.h index 0755c316..b997e7db 100644 --- a/src/Detector/tensorrt_yolo/common/half.h +++ b/src/Detector/tensorrt_yolo/common/half.h @@ -16,13 +16,14 @@ // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -1522,14 +1523,14 @@ class half /// \return incremented half value half& operator++() { - return *this += 1.0f; + return *this += 1.0F; } /// Prefix decrement. /// \return decremented half value half& operator--() { - return *this -= 1.0f; + return *this -= 1.0F; } /// Postfix increment. diff --git a/src/Detector/tensorrt_yolo/common/logger.cpp b/src/Detector/tensorrt_yolo/common/logger.cpp index 03c64398..909ec0bb 100644 --- a/src/Detector/tensorrt_yolo/common/logger.cpp +++ b/src/Detector/tensorrt_yolo/common/logger.cpp @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,7 +18,7 @@ #include "logger.h" #include "ErrorRecorder.h" #include "logging.h" - +using namespace nvinfer1; SampleErrorRecorder gRecorder; namespace sample { diff --git a/src/Detector/tensorrt_yolo/common/logger.h b/src/Detector/tensorrt_yolo/common/logger.h index 3069e8e9..8205e457 100644 --- a/src/Detector/tensorrt_yolo/common/logger.h +++ b/src/Detector/tensorrt_yolo/common/logger.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, diff --git a/src/Detector/tensorrt_yolo/common/logging.h b/src/Detector/tensorrt_yolo/common/logging.h index 78732c10..69273a5e 100644 --- a/src/Detector/tensorrt_yolo/common/logging.h +++ b/src/Detector/tensorrt_yolo/common/logging.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,7 +18,7 @@ #ifndef TENSORRT_LOGGING_H #define TENSORRT_LOGGING_H -#include "NvInferRuntimeCommon.h" +#include "NvInferRuntime.h" #include "sampleOptions.h" #include #include @@ -162,7 +163,7 @@ class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream } LogStreamConsumer(const LogStreamConsumer& other) = delete; LogStreamConsumer() = delete; - ~LogStreamConsumer() = default; + ~LogStreamConsumer() override = default; LogStreamConsumer& operator=(const LogStreamConsumer&) = delete; LogStreamConsumer& operator=(LogStreamConsumer&&) = delete; @@ -291,7 +292,7 @@ class Logger : public nvinfer1::ILogger }; //! - //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger + //! \brief Forward-compatible method for retrieving the nvinfer1::ILogger associated with this Logger //! \return The nvinfer1::ILogger associated with this Logger //! //! TODO Once all samples are updated to use this method to register the logger with TensorRT, @@ -353,7 +354,7 @@ class Logger : public nvinfer1::ILogger //! //! \brief Define a test for logging //! - //! \param[in] name The name of the test. This should be a string starting with + //! \param[in] name The name of the test. This should be a string starting with //! "TensorRT" and containing dot-separated strings containing //! the characters [A-Za-z0-9_]. //! For example, "TensorRT.sample_googlenet" @@ -379,7 +380,8 @@ class Logger : public nvinfer1::ILogger static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv) { // Append TensorRT version as info - const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; + const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "] [b" + + std::to_string(NV_TENSORRT_BUILD) + "]"; auto cmdline = genCmdlineString(argc, argv); return defineTest(vname, cmdline); } diff --git a/src/Detector/tensorrt_yolo/common/parserOnnxConfig.h b/src/Detector/tensorrt_yolo/common/parserOnnxConfig.h index c92a1420..67ee6c71 100644 --- a/src/Detector/tensorrt_yolo/common/parserOnnxConfig.h +++ b/src/Detector/tensorrt_yolo/common/parserOnnxConfig.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -35,15 +36,13 @@ * */ -using namespace std; - class ParserOnnxConfig : public nvonnxparser::IOnnxConfig { protected: - string mModelFilename{}; - string mTextFilename{}; - string mFullTextFilename{}; + std::string mModelFilename{}; + std::string mTextFilename{}; + std::string mFullTextFilename{}; nvinfer1::DataType mModelDtype; nvonnxparser::IOnnxConfig::Verbosity mVerbosity; bool mPrintLayercInfo; @@ -62,8 +61,7 @@ class ParserOnnxConfig : public nvonnxparser::IOnnxConfig #endif } -protected: - ~ParserOnnxConfig() + ~ParserOnnxConfig() override { #ifdef ONNX_DEBUG if (isDebug()) @@ -74,62 +72,62 @@ class ParserOnnxConfig : public nvonnxparser::IOnnxConfig } public: - virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept + void setModelDtype(const nvinfer1::DataType modelDtype) noexcept override { mModelDtype = modelDtype; } - virtual nvinfer1::DataType getModelDtype() const noexcept + nvinfer1::DataType getModelDtype() const noexcept override { return mModelDtype; } - virtual const char* getModelFileName() const noexcept + const char* getModelFileName() const noexcept override { return mModelFilename.c_str(); } - virtual void setModelFileName(const char* onnxFilename) noexcept + void setModelFileName(const char* onnxFilename) noexcept override { - mModelFilename = string(onnxFilename); + mModelFilename = std::string(onnxFilename); } - virtual nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept + nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept override { return mVerbosity; } - virtual void addVerbosity() noexcept + void addVerbosity() noexcept override { ++mVerbosity; } - virtual void reduceVerbosity() noexcept + void reduceVerbosity() noexcept override { --mVerbosity; } - virtual void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept + void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept override { mVerbosity = verbosity; } - virtual const char* getTextFileName() const noexcept + const char* getTextFileName() const noexcept override { return mTextFilename.c_str(); } - virtual void setTextFileName(const char* textFilename) noexcept + void setTextFileName(const char* textFilename) noexcept override { - mTextFilename = string(textFilename); + mTextFilename = std::string(textFilename); } - virtual const char* getFullTextFileName() const noexcept + const char* getFullTextFileName() const noexcept override { return mFullTextFilename.c_str(); } - virtual void setFullTextFileName(const char* fullTextFilename) noexcept + void setFullTextFileName(const char* fullTextFilename) noexcept override { - mFullTextFilename = string(fullTextFilename); + mFullTextFilename = std::string(fullTextFilename); } - virtual bool getPrintLayerInfo() const noexcept + bool getPrintLayerInfo() const noexcept override { return mPrintLayercInfo; } - virtual void setPrintLayerInfo(bool src) noexcept + void setPrintLayerInfo(bool src) noexcept override { mPrintLayercInfo = src; } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() @@ -142,12 +140,6 @@ class ParserOnnxConfig : public nvonnxparser::IOnnxConfig return false; #endif } - - virtual void destroy() noexcept - { - delete this; - } - }; // class ParserOnnxConfig #endif diff --git a/src/Detector/tensorrt_yolo/common/safeCommon.h b/src/Detector/tensorrt_yolo/common/safeCommon.h index 3d84b095..f10aad18 100644 --- a/src/Detector/tensorrt_yolo/common/safeCommon.h +++ b/src/Detector/tensorrt_yolo/common/safeCommon.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,13 +18,32 @@ #ifndef TENSORRT_SAFE_COMMON_H #define TENSORRT_SAFE_COMMON_H -#include "NvInferRuntimeCommon.h" +#include "cuda_runtime.h" +#include "sampleEntrypoints.h" +#include #include +#include #include #include +#include #include #include +// For safeLoadLibrary +#ifdef _MSC_VER +// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include +#endif +#if IS_QNX_SAFE +#include +#include +#endif // IS_QNX_SAFE + +#undef CHECK #define CHECK(status) \ do \ { \ @@ -31,10 +51,92 @@ if (ret != 0) \ { \ std::cerr << "Cuda failure: " << ret << std::endl; \ - abort(); \ + exit(EXIT_FAILURE); \ } \ } while (0) +#undef SAFE_ASSERT +#define SAFE_ASSERT(condition) \ + do \ + { \ + if (!(condition)) \ + { \ + std::cerr << "Assertion failure: " << #condition << std::endl; \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +//! Locate path to file, given its filename or filepath suffix and possible dirs it might lie in. +//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path. +inline std::string locateFile( + const std::string& filepathSuffix, const std::vector& directories, bool reportError = true) +{ + const int MAX_DEPTH{10}; + bool found{false}; + std::string filepath; + + for (auto& dir : directories) + { + if (!dir.empty() && dir.back() != '/') + { +#ifdef _MSC_VER + filepath = dir + "\\" + filepathSuffix; +#else + filepath = dir + "/" + filepathSuffix; +#endif + } + else + { + filepath = dir + filepathSuffix; + } + + for (int i = 0; i < MAX_DEPTH && !found; i++) + { + const std::ifstream checkFile(filepath); + found = checkFile.is_open(); + if (found) + { + break; + } + + filepath = "../" + filepath; // Try again in parent dir + } + + if (found) + { + break; + } + + filepath.clear(); + } + + // Could not find the file + if (filepath.empty()) + { + const std::string dirList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(), + [](const std::string& a, const std::string& b) { return a + "\n\t" + b; }); + std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << dirList << std::endl; + + if (reportError) + { + std::cout << "&&&& FAILED" << std::endl; + exit(EXIT_FAILURE); + } + } + + return filepath; +} + +inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int32_t inH, int32_t inW) +{ + std::ifstream infile(fileName, std::ifstream::binary); + SAFE_ASSERT(infile.is_open() && "Attempting to read from a file that is not open."); + std::string magic, w, h, max; + infile >> magic >> w >> h >> max; + infile.seekg(1, infile.cur); + infile.read(reinterpret_cast(buffer), inH * inW); +} + namespace samplesCommon { template @@ -51,11 +153,17 @@ inline uint32_t elementSize(nvinfer1::DataType t) { switch (t) { + case nvinfer1::DataType::kINT64: return 8; case nvinfer1::DataType::kINT32: case nvinfer1::DataType::kFLOAT: return 4; - case nvinfer1::DataType::kHALF: return 2; - case nvinfer1::DataType::kINT8: return 1; - case nvinfer1::DataType::kBOOL: return 1; + case nvinfer1::DataType::kHALF: + case nvinfer1::DataType::kBF16: return 2; + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kUINT8: + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kFP8: return 1; + case nvinfer1::DataType::kINT4: + SAFE_ASSERT(false && "Element size is not implemented for sub-byte data-types"); } return 0; } @@ -66,6 +174,205 @@ inline A divUp(A x, B n) return (x + n - 1) / n; } +inline int64_t volume(nvinfer1::Dims const& d) +{ + return std::accumulate(d.d, d.d + d.nbDims, int64_t{1}, std::multiplies{}); +} + +//! Return m rounded up to nearest multiple of n +template +inline T1 roundUp(T1 m, T2 n) +{ + static_assert(std::is_integral::value && std::is_integral::value, "arguments must be integers"); + static_assert(std::is_signed::value == std::is_signed::value, "mixed signedness not allowed"); + static_assert(sizeof(T1) >= sizeof(T2), "first type must be as least as wide as second type"); + return ((m + n - 1) / n) * n; +} + +//! comps is the number of components in a vector. Ignored if vecDim < 0. +inline int64_t volume(nvinfer1::Dims dims, int32_t vecDim, int32_t comps, int32_t batch) +{ + if (vecDim >= 0) + { + dims.d[vecDim] = roundUp(dims.d[vecDim], comps); + } + return samplesCommon::volume(dims) * std::max(batch, 1); +} + +inline int32_t getSMVersion() +{ +#if 0 + // Use default value for 4090 + int32_t major{8}; + int32_t minor{9}; +#else + int32_t major{}; + int32_t minor{}; + int32_t deviceIndex{}; + CHECK(cudaGetDevice(&deviceIndex)); + CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); + CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); +#endif + return ((major << 8) | minor); +} + +inline bool isSMSafe() +{ + const int32_t smVersion = getSMVersion(); + return smVersion == 0x0700 || smVersion == 0x0705 || smVersion == 0x0800 || smVersion == 0x0806 + || smVersion == 0x0807; +} + +inline int32_t calculateSoftmax(float* const prob, int32_t const numDigits) +{ + SAFE_ASSERT(prob != nullptr); + SAFE_ASSERT(numDigits == 10); + float sum{0.0F}; + std::transform(prob, prob + numDigits, prob, [&sum](float v) -> float { + sum += exp(v); + return exp(v); + }); + + SAFE_ASSERT(sum != 0.0F); + std::transform(prob, prob + numDigits, prob, [sum](float v) -> float { return v / sum; }); + int32_t idx = std::max_element(prob, prob + numDigits) - prob; + return idx; +} + +//! +//! \class TrtCudaGraphSafe +//! \brief Managed CUDA graph +//! +class TrtCudaGraphSafe +{ +public: + explicit TrtCudaGraphSafe() = default; + + TrtCudaGraphSafe(const TrtCudaGraphSafe&) = delete; + + TrtCudaGraphSafe& operator=(const TrtCudaGraphSafe&) = delete; + + TrtCudaGraphSafe(TrtCudaGraphSafe&&) = delete; + + TrtCudaGraphSafe& operator=(TrtCudaGraphSafe&&) = delete; + + ~TrtCudaGraphSafe() + { + if (mGraphExec) + { + cudaGraphExecDestroy(mGraphExec); + } + } + + void beginCapture(cudaStream_t& stream) + { + // cudaStreamCaptureModeGlobal is the only allowed mode in SAFE CUDA + CHECK(cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal)); + } + + bool launch(cudaStream_t& stream) + { + return cudaGraphLaunch(mGraphExec, stream) == cudaSuccess; + } + + void endCapture(cudaStream_t& stream) + { + CHECK(cudaStreamEndCapture(stream, &mGraph)); + CHECK(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0)); + CHECK(cudaGraphDestroy(mGraph)); + } + + void endCaptureOnError(cudaStream_t& stream) + { + // There are two possibilities why stream capture would fail: + // (1) stream is in cudaErrorStreamCaptureInvalidated state. + // (2) TRT reports a failure. + // In case (1), the returning mGraph should be nullptr. + // In case (2), the returning mGraph is not nullptr, but it should not be used. + const auto ret = cudaStreamEndCapture(stream, &mGraph); + if (ret == cudaErrorStreamCaptureInvalidated) + { + SAFE_ASSERT(mGraph == nullptr); + } + else + { + SAFE_ASSERT(ret == cudaSuccess); + SAFE_ASSERT(mGraph != nullptr); + CHECK(cudaGraphDestroy(mGraph)); + mGraph = nullptr; + } + // Clean up any CUDA error. + cudaGetLastError(); + sample::gLogError << "The CUDA graph capture on the stream has failed." << std::endl; + } + +private: + cudaGraph_t mGraph{}; + cudaGraphExec_t mGraphExec{}; +}; + +inline void safeLoadLibrary(const std::string& path) +{ +#ifdef _MSC_VER + void* handle = LoadLibraryA(path.c_str()); +#else + int32_t flags{RTLD_LAZY}; + void* handle = dlopen(path.c_str(), flags); +#endif + if (handle == nullptr) + { +#ifdef _MSC_VER + sample::gLogError << "Could not load plugin library: " << path << std::endl; +#else + sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl; +#endif + } +} + +inline std::vector safeSplitString(std::string str, char delimiter = ',') +{ + std::vector splitVect; + std::stringstream ss(str); + std::string substr; + + while (ss.good()) + { + getline(ss, substr, delimiter); + splitVect.emplace_back(std::move(substr)); + } + return splitVect; +} + } // namespace samplesCommon +namespace safetyCompliance +{ +inline void initSafeCuda() +{ + // According to CUDA initialization in NVIDIA CUDA SAFETY API REFERENCE FOR DRIVE OS + // We will need to do the following in order + // 1. Initialize the calling thread with CUDA specific information (Call any CUDA RT API identified as init) + // 2. Query/Configure and choose the desired CUDA device + // 3. CUDA context initialization. (Call cudaDeviceGetLimit or cuCtxCreate) + size_t stackSizeLimit = 0; + int32_t deviceIndex = 0; + CHECK(cudaGetDevice(&deviceIndex)); + CHECK(cudaDeviceGetLimit(&stackSizeLimit, cudaLimitStackSize)); +#if IS_QNX_SAFE + CHECK(cudaSafeExSelectAPIMode(cudaSafeExAPIModeAsilB)); +#endif // IS_QNX_SAFE +} + +inline void setPromgrAbility() +{ +#if IS_QNX_SAFE + // Comply with DEEPLRN_RES_117 on QNX-safe by dropping PROCMGR_AID_MEM_PHYS ability and locking out any further + // changes + procmgr_ability( + 0, PROCMGR_ADN_NONROOT | PROCMGR_AOP_DENY | PROCMGR_AOP_LOCK | PROCMGR_AID_MEM_PHYS, PROCMGR_AID_EOL); +#endif // IS_QNX_SAFE +} + +} // namespace safetyCompliance + #endif // TENSORRT_SAFE_COMMON_H diff --git a/src/Detector/tensorrt_yolo/common/sampleConfig.h b/src/Detector/tensorrt_yolo/common/sampleConfig.h index 53a78331..801a268a 100644 --- a/src/Detector/tensorrt_yolo/common/sampleConfig.h +++ b/src/Detector/tensorrt_yolo/common/sampleConfig.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -55,9 +56,9 @@ class SampleConfig : public nvonnxparser::IOnnxConfig bool mDebugBuilder{false}; InputDataFormat mInputDataFormat{InputDataFormat::kASCII}; uint64_t mTopK{0}; - float mFailurePercentage{-1.0f}; - float mTolerance{0.0f}; - float mAbsTolerance{1e-5f}; + float mFailurePercentage{-1.0F}; + float mTolerance{0.0F}; + float mAbsTolerance{1e-5F}; public: SampleConfig() @@ -70,8 +71,7 @@ class SampleConfig : public nvonnxparser::IOnnxConfig #endif } -protected: - ~SampleConfig() + ~SampleConfig() override { #ifdef ONNX_DEBUG if (isDebug()) @@ -82,12 +82,12 @@ class SampleConfig : public nvonnxparser::IOnnxConfig } public: - void setModelDtype(const nvinfer1::DataType mdt) noexcept + void setModelDtype(const nvinfer1::DataType mdt) noexcept override { mModelDtype = mdt; } - nvinfer1::DataType getModelDtype() const noexcept + nvinfer1::DataType getModelDtype() const noexcept override { return mModelDtype; } @@ -102,28 +102,28 @@ class SampleConfig : public nvonnxparser::IOnnxConfig mTF32 = enabled; } - const char* getModelFileName() const noexcept + const char* getModelFileName() const noexcept override { return mModelFilename.c_str(); } - void setModelFileName(const char* onnxFilename) noexcept + void setModelFileName(const char* onnxFilename) noexcept override { mModelFilename = std::string(onnxFilename); } - Verbosity getVerbosityLevel() const noexcept + Verbosity getVerbosityLevel() const noexcept override { return mVerbosity; } - void addVerbosity() noexcept + void addVerbosity() noexcept override { ++mVerbosity; } - void reduceVerbosity() noexcept + void reduceVerbosity() noexcept override { --mVerbosity; } - virtual void setVerbosityLevel(Verbosity v) noexcept + void setVerbosityLevel(Verbosity v) noexcept override { mVerbosity = v; } @@ -135,19 +135,19 @@ class SampleConfig : public nvonnxparser::IOnnxConfig { mEngineFilename = std::string(engineFilename); } - const char* getTextFileName() const noexcept + const char* getTextFileName() const noexcept override { return mTextFilename.c_str(); } - void setTextFileName(const char* textFilename) noexcept + void setTextFileName(const char* textFilename) noexcept override { mTextFilename = std::string(textFilename); } - const char* getFullTextFileName() const noexcept + const char* getFullTextFileName() const noexcept override { return mFullTextFilename.c_str(); } - void setFullTextFileName(const char* fullTextFilename) noexcept + void setFullTextFileName(const char* fullTextFilename) noexcept override { mFullTextFilename = std::string(fullTextFilename); } @@ -161,12 +161,12 @@ class SampleConfig : public nvonnxparser::IOnnxConfig return mLabel; } //!< get the Label - bool getPrintLayerInfo() const noexcept + bool getPrintLayerInfo() const noexcept override { return mPrintLayercInfo; } - void setPrintLayerInfo(bool b) noexcept + void setPrintLayerInfo(bool b) noexcept override { mPrintLayercInfo = b; } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() @@ -312,7 +312,7 @@ class SampleConfig : public nvonnxparser::IOnnxConfig { return mTimingCacheFilename.c_str(); } - + void setTimingCacheFileName(const char* timingCacheFilename) noexcept { mTimingCacheFilename = std::string(timingCacheFilename); @@ -326,12 +326,6 @@ class SampleConfig : public nvonnxparser::IOnnxConfig return false; #endif } - - void destroy() noexcept - { - delete this; - } - }; // class SampleConfig #endif diff --git a/src/Detector/tensorrt_yolo/common/sampleDevice.cpp b/src/Detector/tensorrt_yolo/common/sampleDevice.cpp new file mode 100644 index 00000000..7964aeb5 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/sampleDevice.cpp @@ -0,0 +1,133 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sampleDevice.h" + +#include + +namespace sample +{ + +void cudaCheck(cudaError_t ret, std::ostream& err) +{ + if (ret != cudaSuccess) + { + err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl; + exit(EXIT_FAILURE); + } +} + +// Construct GPU UUID string in the same format as nvidia-smi does. +std::string getUuidString(cudaUUID_t uuid) +{ + constexpr int32_t kUUID_SIZE = sizeof(cudaUUID_t); + static_assert(kUUID_SIZE == 16, "Unexpected size for cudaUUID_t!"); + + std::ostringstream ss; + std::vector const splits = {0, 4, 6, 8, 10, kUUID_SIZE}; + + ss << "GPU" << std::hex << std::setfill('0'); + for (int32_t splitIdx = 0; splitIdx < static_cast(splits.size()) - 1; ++splitIdx) + { + ss << "-"; + for (int32_t byteIdx = splits[splitIdx]; byteIdx < splits[splitIdx + 1]; ++byteIdx) + { + ss << std::setw(2) << +static_cast(uuid.bytes[byteIdx]); + } + } + return ss.str(); +} + +void setCudaDevice(int32_t device, std::ostream& os) +{ +#if !TRT_WINML + os << "=== Device Information ===" << std::endl; + + // Get the number of visible GPUs. + int32_t nbDevices{-1}; + cudaCheck(cudaGetDeviceCount(&nbDevices)); + + if (nbDevices <= 0) + { + os << "Cannot find any available devices (GPUs)!" << std::endl; + exit(EXIT_FAILURE); + } + + // Print out the GPU name and PCIe bus ID of each GPU. + os << "Available Devices: " << std::endl; + cudaDeviceProp properties; + for (int32_t deviceIdx = 0; deviceIdx < nbDevices; ++deviceIdx) + { + cudaDeviceProp tempProperties; + cudaCheck(cudaGetDeviceProperties(&tempProperties, deviceIdx)); + + // clang-format off + os << " Device " << deviceIdx << ": \"" << tempProperties.name << "\" UUID: " + << getUuidString(tempProperties.uuid) << std::endl; + // clang-format on + + // Record the properties of the desired GPU. + if (deviceIdx == device) + { + properties = tempProperties; + } + } + + // Exit with error if the requested device ID does not exist. + if (device < 0 || device >= nbDevices) + { + os << "Cannot find device ID " << device << "!" << std::endl; + exit(EXIT_FAILURE); + } + + // Set to the corresponding GPU. + cudaCheck(cudaSetDevice(device)); + + // clang-format off + os << "Selected Device: " << properties.name << std::endl; + os << "Selected Device ID: " << device << std::endl; + os << "Selected Device UUID: " << getUuidString(properties.uuid) << std::endl; + os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl; + os << "SMs: " << properties.multiProcessorCount << std::endl; + os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl; + os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; + os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" + << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; + os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; + os << std::endl; + os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " + << "currently running at." << std::endl; + // clang-format on +#endif +} + +int32_t getCudaDriverVersion() +{ + int32_t version{-1}; + cudaCheck(cudaDriverGetVersion(&version)); + return version; +} + +int32_t getCudaRuntimeVersion() +{ + int32_t version{-1}; + cudaCheck(cudaRuntimeGetVersion(&version)); + return version; +} + +} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleDevice.h b/src/Detector/tensorrt_yolo/common/sampleDevice.h index 2053ac7c..986dccb4 100644 --- a/src/Detector/tensorrt_yolo/common/sampleDevice.h +++ b/src/Detector/tensorrt_yolo/common/sampleDevice.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -23,17 +24,13 @@ #include #include +#include "sampleUtils.h" + namespace sample { -inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) -{ - if (ret != cudaSuccess) - { - err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl; - abort(); - } -} +//! Check if the CUDA return status shows any error. If so, exit the program immediately. +void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr); class TrtCudaEvent; @@ -238,16 +235,18 @@ class TrtCudaBuffer TrtCudaBuffer(TrtCudaBuffer&& rhs) { - reset(rhs.mPtr); + reset(rhs.mPtr, rhs.mSize); rhs.mPtr = nullptr; + rhs.mSize = 0; } TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) { if (this != &rhs) { - reset(rhs.mPtr); + reset(rhs.mPtr, rhs.mSize); rhs.mPtr = nullptr; + rhs.mSize = 0; } return *this; } @@ -260,21 +259,24 @@ class TrtCudaBuffer TrtCudaBuffer(size_t size) { A()(&mPtr, size); + mSize = size; } void allocate(size_t size) { reset(); A()(&mPtr, size); + mSize = size; } - void reset(void* ptr = nullptr) + void reset(void* ptr = nullptr, size_t size = 0) { if (mPtr) { D()(mPtr); } mPtr = ptr; + mSize = size; } void* get() const @@ -282,8 +284,14 @@ class TrtCudaBuffer return mPtr; } + size_t getSize() const + { + return mSize; + } + private: void* mPtr{nullptr}; + size_t mSize{0}; }; struct DeviceAllocator @@ -383,39 +391,39 @@ class IMirroredBuffer }; // class IMirroredBuffer //! -//! Class to have a seperate memory buffer for discrete device and host allocations. +//! Class to have a separate memory buffer for discrete device and host allocations. //! class DiscreteMirroredBuffer : public IMirroredBuffer { public: - void allocate(size_t size) + void allocate(size_t size) override { mSize = size; mHostBuffer.allocate(size); mDeviceBuffer.allocate(size); } - void* getDeviceBuffer() const + void* getDeviceBuffer() const override { return mDeviceBuffer.get(); } - void* getHostBuffer() const + void* getHostBuffer() const override { return mHostBuffer.get(); } - void hostToDevice(TrtCudaStream& stream) + void hostToDevice(TrtCudaStream& stream) override { cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, cudaMemcpyHostToDevice, stream.get())); } - void deviceToHost(TrtCudaStream& stream) + void deviceToHost(TrtCudaStream& stream) override { cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, cudaMemcpyDeviceToHost, stream.get())); } - size_t getSize() const + size_t getSize() const override { return mSize; } @@ -432,33 +440,33 @@ class DiscreteMirroredBuffer : public IMirroredBuffer class UnifiedMirroredBuffer : public IMirroredBuffer { public: - void allocate(size_t size) + void allocate(size_t size) override { mSize = size; mBuffer.allocate(size); } - void* getDeviceBuffer() const + void* getDeviceBuffer() const override { return mBuffer.get(); } - void* getHostBuffer() const + void* getHostBuffer() const override { return mBuffer.get(); } - void hostToDevice(TrtCudaStream& /*stream*/) + void hostToDevice(TrtCudaStream& stream) override { // Does nothing since we are using unified memory. } - void deviceToHost(TrtCudaStream& /*stream*/) + void deviceToHost(TrtCudaStream& stream) override { // Does nothing since we are using unified memory. } - size_t getSize() const + size_t getSize() const override { return mSize; } @@ -468,26 +476,70 @@ class UnifiedMirroredBuffer : public IMirroredBuffer TrtManagedBuffer mBuffer; }; // class UnifiedMirroredBuffer -inline void setCudaDevice(int device, std::ostream& os) +//! +//! Class to allocate memory for outputs with data-dependent shapes. The sizes of those are unknown so pre-allocation is +//! not possible. +//! +class OutputAllocator : public nvinfer1::IOutputAllocator { - cudaCheck(cudaSetDevice(device)); - - cudaDeviceProp properties; - cudaCheck(cudaGetDeviceProperties(&properties, device)); - -// clang-format off - os << "=== Device Information ===" << std::endl; - os << "Selected Device: " << properties.name << std::endl; - os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl; - os << "SMs: " << properties.multiProcessorCount << std::endl; - os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; - os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl; - os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; - os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" - << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; - os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; - // clang-format on -} +public: + OutputAllocator(IMirroredBuffer* buffer) + : mBuffer(buffer) + { + } + + void* reallocateOutput( + char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override + { + // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr + // even for empty tensors, so allocate a dummy byte. + size = std::max(size, static_cast(1)); + if (size > mSize) + { + mBuffer->allocate(roundUp(size, alignment)); + mSize = size; + } + return mBuffer->getDeviceBuffer(); + } + + //! IMirroredBuffer does not implement Async allocation, hence this is just a wrap around + void* reallocateOutputAsync(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, + cudaStream_t /*stream*/) noexcept override + { + return reallocateOutput(tensorName, currentMemory, size, alignment); + } + + void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override + { + mFinalDims = dims; + } + + IMirroredBuffer* getBuffer() + { + return mBuffer.get(); + } + + nvinfer1::Dims getFinalDims() + { + return mFinalDims; + } + + ~OutputAllocator() override {} + +private: + std::unique_ptr mBuffer; + uint64_t mSize{}; + nvinfer1::Dims mFinalDims; +}; + +//! Set the GPU to run the inference on. +void setCudaDevice(int32_t device, std::ostream& os); + +//! Get the CUDA version of the current CUDA driver. +int32_t getCudaDriverVersion(); + +//! Get the CUDA version of the current CUDA runtime. +int32_t getCudaRuntimeVersion(); } // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleEngines.cpp b/src/Detector/tensorrt_yolo/common/sampleEngines.cpp index 8bb8a8fe..dacf6f2a 100644 --- a/src/Detector/tensorrt_yolo/common/sampleEngines.cpp +++ b/src/Detector/tensorrt_yolo/common/sampleEngines.cpp @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,6 +16,7 @@ */ #include +#include #include #include #include @@ -28,17 +30,16 @@ #include "NvInfer.h" #include "NvOnnxParser.h" -#include "common.h" #include "ErrorRecorder.h" +#include "common.h" #include "half.h" #include "logger.h" +#include "sampleDevice.h" #include "sampleEngines.h" #include "sampleOptions.h" #include "sampleUtils.h" -#if !defined(_WIN32) -#include -#endif +using namespace nvinfer1; namespace sample { @@ -46,7 +47,7 @@ namespace sample namespace { -std::map readScalesFromCalibrationCache(const std::string& calibrationFile) +std::map readScalesFromCalibrationCache(std::string const& calibrationFile) { std::map tensorScales; std::ifstream cache{calibrationFile}; @@ -63,7 +64,7 @@ std::map readScalesFromCalibrationCache(const std::string& c { // Scales should be stored in calibration cache as 32-bit floating numbers encoded as 32-bit integers int32_t scalesAsInt = std::stoi(line.substr(colonPos + 2, 8), nullptr, 16); - const auto tensorName = line.substr(0, colonPos); + auto const tensorName = line.substr(0, colonPos); tensorScales[tensorName] = *reinterpret_cast(&scalesAsInt); } } @@ -72,69 +73,185 @@ std::map readScalesFromCalibrationCache(const std::string& c } } // namespace -void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, const std::vector& inputFormats, - const std::vector& outputFormats, const std::string& calibrationFile) +nvinfer1::ICudaEngine* LazilyDeserializedEngine::get() +{ + SMP_RETVAL_IF_FALSE( + !mIsSafe, "Safe mode is enabled, but trying to get standard engine!", nullptr, sample::gLogError); + + if (mEngine == nullptr) + { + SMP_RETVAL_IF_FALSE(getFileReader().isOpen() || !getBlob().empty(), "Engine is empty. Nothing to deserialize!", + nullptr, sample::gLogError); + + using time_point = std::chrono::time_point; + using duration = std::chrono::duration; + time_point const deserializeStartTime{std::chrono::high_resolution_clock::now()}; + + if (mLeanDLLPath.empty()) + { + mRuntime.reset(createRuntime()); + } + else + { + mParentRuntime.reset(createRuntime()); + ASSERT(mParentRuntime.get() != nullptr); + + mRuntime.reset(mParentRuntime->loadRuntime(mLeanDLLPath.c_str())); + } + ASSERT(mRuntime.get() != nullptr); + + if (mVersionCompatible) + { + // Application needs to opt into allowing deserialization of engines with embedded lean runtime. + mRuntime->setEngineHostCodeAllowed(true); + } + + if (!mTempdir.empty()) + { + mRuntime->setTemporaryDirectory(mTempdir.c_str()); + } + + mRuntime->setTempfileControlFlags(mTempfileControls); + + SMP_RETVAL_IF_FALSE(mRuntime != nullptr, "runtime creation failed", nullptr, sample::gLogError); + if (mDLACore != -1) + { + mRuntime->setDLACore(mDLACore); + } + mRuntime->setErrorRecorder(&gRecorder); +#if !TRT_WINML + for (auto const& pluginPath : mDynamicPlugins) + { + mRuntime->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } +#endif + + if (getFileReader().isOpen()) + { + mEngine.reset(mRuntime->deserializeCudaEngine(getFileReader())); + } + else + { + auto const& engineBlob = getBlob(); + mEngine.reset(mRuntime->deserializeCudaEngine(engineBlob.data, engineBlob.size)); + } + SMP_RETVAL_IF_FALSE(mEngine != nullptr, "Engine deserialization failed", nullptr, sample::gLogError); + + time_point const deserializeEndTime{std::chrono::high_resolution_clock::now()}; + sample::gLogInfo << "Engine deserialized in " << duration(deserializeEndTime - deserializeStartTime).count() + << " sec." << std::endl; + } + + return mEngine.get(); +} + +nvinfer1::ICudaEngine* LazilyDeserializedEngine::release() { - const auto tensorScales = readScalesFromCalibrationCache(calibrationFile); - const bool broadcastInputFormats = broadcastIOFormats(inputFormats, network.getNbInputs()); + return mEngine.release(); +} + +void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector const& inputFormats, + std::vector const& outputFormats, std::string const& calibrationFile) +{ + auto const tensorScales = readScalesFromCalibrationCache(calibrationFile); + bool const broadcastInputFormats = broadcastIOFormats(inputFormats, network.getNbInputs()); for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i) { int32_t formatIdx = broadcastInputFormats ? 0 : i; - if (!inputFormats.empty() && inputFormats[formatIdx].first == nvinfer1::DataType::kINT8) + if (!inputFormats.empty() && inputFormats[formatIdx].first == DataType::kINT8) { auto* input = network.getInput(i); - const auto calibScale = tensorScales.at(input->getName()); + auto const calibScale = tensorScales.at(input->getName()); input->setDynamicRange(-127 * calibScale, 127 * calibScale); } } - const bool broadcastOutputFormats = broadcastIOFormats(outputFormats, network.getNbInputs()); + bool const broadcastOutputFormats = broadcastIOFormats(outputFormats, network.getNbOutputs()); for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i) { int32_t formatIdx = broadcastOutputFormats ? 0 : i; - if (!outputFormats.empty() && outputFormats[formatIdx].first == nvinfer1::DataType::kINT8) + if (!outputFormats.empty() && outputFormats[formatIdx].first == DataType::kINT8) { auto* output = network.getOutput(i); - const auto calibScale = tensorScales.at(output->getName()); + auto const calibScale = tensorScales.at(output->getName()); output->setDynamicRange(-127 * calibScale, 127 * calibScale); } } } -#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \ - { \ - if ((condition) == false) \ - { \ - (err) << (msg) << std::endl; \ - return retval; \ - } \ - } - -Parser modelToNetwork(const ModelOptions& model, nvinfer1::INetworkDefinition& network, std::ostream& err) +//! +//! \brief Generate a network definition for a given model +//! +//! \param[in] model Model options for this network +//! \param[in,out] network Network storing the parsed results +//! \param[in,out] err Error stream +//! \param[out] vcPluginLibrariesUsed If not nullptr, will be populated with paths to VC plugin libraries required by +//! the parsed network. +//! +//! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid +//! parser (the returned parser converts to false if tested) +//! +//! Constant input dimensions in the model must not be changed in the corresponding +//! network definition, because its correctness may rely on the constants. +//! +//! \see Parser::operator bool() +//! +Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvinfer1::INetworkDefinition& network, + std::ostream& err, std::vector* vcPluginLibrariesUsed) { - sample::gLogInfo << "Start parsing network model" << std::endl; + sample::gLogInfo << "Start parsing network model." << std::endl; + auto const tBegin = std::chrono::high_resolution_clock::now(); + Parser parser; - //const std::string& modelName = model.baseModel.model; switch (model.baseModel.format) { case ModelFormat::kONNX: { using namespace nvonnxparser; - parser.onnxParser.reset(createParser(network, sample::gLogger.getTRTLogger())); + parser.onnxParser.reset(createONNXParser(network)); + ASSERT(parser.onnxParser != nullptr); +#if !TRT_WINML + // kNATIVE_INSTANCENORM is ON by default in the parser and must be cleared to use the plugin implementation. + if (build.pluginInstanceNorm) + { + parser.onnxParser->clearFlag(OnnxParserFlag::kNATIVE_INSTANCENORM); + } +#endif if (!parser.onnxParser->parseFromFile( model.baseModel.model.c_str(), static_cast(sample::gLogger.getReportableSeverity()))) { err << "Failed to parse onnx file" << std::endl; parser.onnxParser.reset(); } +#if !TRT_WINML + if (vcPluginLibrariesUsed && parser.onnxParser.get()) + { + int64_t nbPluginLibs; + char const* const* pluginLibArray = parser.onnxParser->getUsedVCPluginLibraries(nbPluginLibs); + if (nbPluginLibs >= 0) + { + vcPluginLibrariesUsed->reserve(nbPluginLibs); + for (int64_t i = 0; i < nbPluginLibs; ++i) + { + sample::gLogInfo << "Using VC plugin library " << pluginLibArray[i] << std::endl; + vcPluginLibrariesUsed->emplace_back(std::string{pluginLibArray[i]}); + } + } + else + { + sample::gLogWarning << "Failure to query VC plugin libraries required by parsed ONNX network" + << std::endl; + } + } +#endif break; } - case ModelFormat::kANY: - break; - default: - break; + case ModelFormat::kANY: break; } - sample::gLogInfo << "Finish parsing network model" << std::endl; + auto const tEnd = std::chrono::high_resolution_clock::now(); + float const parseTime = std::chrono::duration(tEnd - tBegin).count(); + + sample::gLogInfo << "Finished parsing network model. Parse time: " << parseTime << std::endl; return parser; } @@ -144,10 +261,10 @@ namespace class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 { public: - RndInt8Calibrator(int batches, std::vector& elemCount, const std::string& cacheFile, - const nvinfer1::INetworkDefinition& network, std::ostream& err); + RndInt8Calibrator(int32_t batches, std::vector& elemCount, std::string const& cacheFile, + nvinfer1::INetworkDefinition const& network, std::ostream& err); - ~RndInt8Calibrator() + ~RndInt8Calibrator() override { for (auto& elem : mInputDeviceBuffers) { @@ -155,28 +272,28 @@ class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 } } - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; + bool getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept override; - int getBatchSize() const noexcept override + int32_t getBatchSize() const noexcept override { return 1; } const void* readCalibrationCache(size_t& length) noexcept override; - virtual void writeCalibrationCache(const void*, size_t) noexcept override {} + void writeCalibrationCache(void const*, size_t) noexcept override {} private: - int mBatches{}; - int mCurrentBatch{}; + int32_t mBatches{}; + int32_t mCurrentBatch{}; std::string mCacheFile; std::map mInputDeviceBuffers; std::vector mCalibrationCache; std::ostream& mErr; }; -RndInt8Calibrator::RndInt8Calibrator(int batches, std::vector& elemCount, const std::string& cacheFile, - const nvinfer1::INetworkDefinition& network, std::ostream& err) +RndInt8Calibrator::RndInt8Calibrator(int32_t batches, std::vector& elemCount, std::string const& cacheFile, + INetworkDefinition const& network, std::ostream& err) : mBatches(batches) , mCurrentBatch(0) , mCacheFile(cacheFile) @@ -192,7 +309,7 @@ RndInt8Calibrator::RndInt8Calibrator(int batches, std::vector& elemCoun std::uniform_real_distribution distribution(-1.0F, 1.0F); auto gen = [&generator, &distribution]() { return distribution(generator); }; - for (int i = 0; i < network.getNbInputs(); i++) + for (int32_t i = 0; i < network.getNbInputs(); i++) { auto* input = network.getInput(i); std::vector rnd_data(elemCount[i]); @@ -206,14 +323,14 @@ RndInt8Calibrator::RndInt8Calibrator(int batches, std::vector& elemCoun } } -bool RndInt8Calibrator::getBatch(void* bindings[], const char* names[], int nbBindings) noexcept +bool RndInt8Calibrator::getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept { if (mCurrentBatch >= mBatches) { return false; } - for (int i = 0; i < nbBindings; ++i) + for (int32_t i = 0; i < nbBindings; ++i) { bindings[i] = mInputDeviceBuffers[names[i]]; } @@ -238,35 +355,35 @@ const void* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr; } -bool setTensorDynamicRange(const nvinfer1::INetworkDefinition& network, float inRange = 2.0F, float outRange = 4.0F) +bool setTensorDynamicRange(INetworkDefinition const& network, float inRange = 2.0F, float outRange = 4.0F) { // Ensure that all layer inputs have a dynamic range. - for (int l = 0; l < network.getNbLayers(); l++) + for (int32_t l = 0; l < network.getNbLayers(); l++) { auto* layer = network.getLayer(l); - for (int i = 0; i < layer->getNbInputs(); i++) + for (int32_t i = 0; i < layer->getNbInputs(); i++) { - nvinfer1::ITensor* input{layer->getInput(i)}; + ITensor* input{layer->getInput(i)}; // Optional inputs are nullptr here and are from RNN layers. if (input && !input->dynamicRangeIsSet()) { // Concat should propagate dynamic range from outputs to inputs to avoid // Re-quantization during the concatenation - auto dynRange = (layer->getType() == nvinfer1::LayerType::kCONCATENATION) ? outRange : inRange; + auto dynRange = (layer->getType() == LayerType::kCONCATENATION) ? outRange : inRange; if (!input->setDynamicRange(-dynRange, dynRange)) { return false; } } } - for (int o = 0; o < layer->getNbOutputs(); o++) + for (int32_t o = 0; o < layer->getNbOutputs(); o++) { - nvinfer1::ITensor* output{layer->getOutput(o)}; + ITensor* output{layer->getOutput(o)}; // Optional outputs are nullptr here and are from RNN layers. if (output && !output->dynamicRangeIsSet()) { // Pooling must have the same input and output dynamic range. - if (layer->getType() == nvinfer1::LayerType::kPOOLING) + if (layer->getType() == LayerType::kPOOLING) { if (!output->setDynamicRange(-inRange, inRange)) { @@ -286,319 +403,43 @@ bool setTensorDynamicRange(const nvinfer1::INetworkDefinition& network, float in return true; } -// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0. -template -void sparsify(const T* values, int64_t count, int32_t k, int32_t rs, std::vector& sparseWeights) -{ - const auto c = count / (k * rs); - sparseWeights.resize(count * sizeof(T)); - auto* sparseValues = reinterpret_cast(sparseWeights.data()); - - constexpr int32_t window = 4; - constexpr int32_t nonzeros = 2; - - const int32_t crs = c * rs; - const auto getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * rs + rsi; }; - - for (int64_t ki = 0; ki < k; ++ki) - { - for (int64_t rsi = 0; rsi < rs; ++rsi) - { - int32_t w = 0; - int32_t nz = 0; - for (int64_t ci = 0; ci < c; ++ci) - { - const auto index = getIndex(ki, ci, rsi); - if (nz < nonzeros) - { - sparseValues[index] = values[index]; - ++nz; - } - else - { - sparseValues[index] = 0; - } - if (++w == window) - { - w = 0; - nz = 0; - } - } - } - } -} - -void sparsify(const nvinfer1::Weights& weights, int32_t k, int32_t rs, std::vector& sparseWeights) -{ - switch (weights.type) - { - case nvinfer1::DataType::kFLOAT: - sparsify(static_cast(weights.values), weights.count, k, rs, sparseWeights); - break; - case nvinfer1::DataType::kHALF: - sparsify(static_cast(weights.values), weights.count, k, rs, sparseWeights); - break; - case nvinfer1::DataType::kINT8: - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kBOOL: break; - } -} - -template -void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector& sparseWeights) +bool isNonActivationType(nvinfer1::DataType const type) { - auto weights = l.getKernelWeights(); - sparsify(weights, k, rs, sparseWeights); - weights.values = sparseWeights.data(); - l.setKernelWeights(weights); + return type == nvinfer1::DataType::kINT32 || type == nvinfer1::DataType::kINT64 || type == nvinfer1::DataType::kBOOL + || type == nvinfer1::DataType::kUINT8; } -template -void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n) +void setLayerPrecisions(INetworkDefinition& network, LayerPrecisions const& layerPrecisions) { - ASSERT(dst != src); - T* tdst = reinterpret_cast(dst); - T const* tsrc = reinterpret_cast(src); - for (int32_t mi = 0; mi < m; ++mi) - { - for (int32_t ni = 0; ni < n; ++ni) - { - int32_t const isrc = mi * n + ni; - int32_t const idst = ni * m + mi; - tdst[idst] = tsrc[isrc]; - } - } -} - -// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers. -// Forward analysis on the API graph to determine which weights to sparsify. -void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) -{ - using TensorToLayer = std::unordered_map; - using LayerToTensor = std::unordered_map; - - // 1. Collect layers and tensors information from the network. - TensorToLayer matmulI2L; - TensorToLayer constO2L; - TensorToLayer shuffleI2L; - LayerToTensor shuffleL2O; - auto collectMappingInfo = [&](int32_t const idx) { - nvinfer1::ILayer* l = network.getLayer(idx); - switch (l->getType()) - { - case nvinfer1::LayerType::kMATRIX_MULTIPLY: - { - // assume weights on the second input. - matmulI2L.insert({l->getInput(1), l}); - break; - } - case nvinfer1::LayerType::kCONSTANT: - { - nvinfer1::DataType const dtype = static_cast(l)->getWeights().type; - if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF) - { - // Sparsify float only. - constO2L.insert({l->getOutput(0), l}); - } - break; - } - case nvinfer1::LayerType::kSHUFFLE: - { - shuffleI2L.insert({l->getInput(0), l}); - shuffleL2O.insert({l, l->getOutput(0)}); - break; - } - default: break; - } - }; - int32_t const nbLayers = network.getNbLayers(); - for (int32_t i = 0; i < nbLayers; ++i) - { - collectMappingInfo(i); - } - if (matmulI2L.size() == 0 || constO2L.size() == 0) - { - // No MatrixMultiply or Constant layer found, no weights to sparsify. - return; - } - - // Helper for analysis - auto isTranspose = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; - auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; - auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool { - for (int32_t i = 0; i < dims.nbDims; ++i) - { - if (dims.d[i] != i || dims.d[i] != -1) - { - return false; - } - } - return true; - }; - auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) - { - while (shuffleI2L.find(t) != shuffleI2L.end()) - { - nvinfer1::IShuffleLayer* s = static_cast(shuffleI2L.at(t)); - if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions()) - || !isIdenticalReshape(s->getReshapeDimensions())) - { - break; - } - - if (isTranspose(s->getFirstTranspose())) - needTranspose = !needTranspose; - if (isTranspose(s->getSecondTranspose())) - needTranspose = !needTranspose; - - t = shuffleL2O.at(s); - } - return t; - }; - - // 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose - std::unordered_map constantLayerToSparse; - for (auto& o2l : constO2L) - { - // If need to transpose the weights of the Constant layer. - // Need to transpose by default due to semantic difference. - bool needTranspose{true}; - nvinfer1::ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); - if (matmulI2L.find(t) == matmulI2L.end()) - { - continue; - } - - // check MatMul params... - nvinfer1::IMatrixMultiplyLayer* mm = static_cast(matmulI2L.at(t)); - bool const twoInputs = mm->getNbInputs() == 2; - bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions()); - bool const isSimple - = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE && mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR; - if (!(twoInputs && all2D && isSimple)) - continue; - - if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE) - needTranspose = !needTranspose; - - constantLayerToSparse.insert({static_cast(o2l.second), needTranspose}); - } - - // 3. Finally, sparsify the weights - auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) - { - nvinfer1::Dims dims = layer->getOutput(0)->getDimensions(); - ASSERT(dims.nbDims == 2); - int32_t const idxN = needTranspose ? 1 : 0; - int32_t const n = dims.d[idxN]; - int32_t const k = dims.d[1 - idxN]; - sparseWeights.emplace_back(); - std::vector& spw = sparseWeights.back(); - nvinfer1::Weights w = layer->getWeights(); - nvinfer1::DataType const dtype = w.type; - ASSERT(dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored. - - if (needTranspose) - { - if (dtype == nvinfer1::DataType::kFLOAT) - { - spw.resize(w.count * sizeof(float)); - transpose2DWeights(spw.data(), w.values, k, n); - } - else if (dtype == nvinfer1::DataType::kHALF) - { - spw.resize(w.count * sizeof(half_float::half)); - transpose2DWeights(spw.data(), w.values, k, n); - } - - w.values = spw.data(); - std::vector tmpW; - sparsify(w, n, 1, tmpW); - - if (dtype == nvinfer1::DataType::kFLOAT) - transpose2DWeights(spw.data(), tmpW.data(), n, k); - else if (dtype == nvinfer1::DataType::kHALF) - transpose2DWeights(spw.data(), tmpW.data(), n, k); - } - else - { - sparsify(w, n, 1, spw); - } - - w.values = spw.data(); - layer->setWeights(w); - }; - for (auto& l : constantLayerToSparse) - { - sparsifyConstantWeights(l.first, l.second); - } -} - -void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) -{ - for (int32_t l = 0; l < network.getNbLayers(); ++l) - { - auto* layer = network.getLayer(l); - const auto t = layer->getType(); - if (t == nvinfer1::LayerType::kCONVOLUTION) - { - auto& conv = *static_cast(layer); - const auto& dims = conv.getKernelSizeNd(); - if (dims.nbDims > 2) - { - continue; - } - const auto k = conv.getNbOutputMaps(); - const auto rs = dims.d[0] * dims.d[1]; - sparseWeights.emplace_back(); - setSparseWeights(conv, k, rs, sparseWeights.back()); - } - else if (t == nvinfer1::LayerType::kFULLY_CONNECTED) - { - auto& fc = *static_cast(layer); - const auto k = fc.getNbOutputChannels(); - sparseWeights.emplace_back(); - setSparseWeights(fc, k, 1, sparseWeights.back()); - } - } - - sparsifyMatMulKernelWeights(network, sparseWeights); -} - -void setLayerPrecisions(nvinfer1::INetworkDefinition& network, LayerPrecisions const& layerPrecisions) -{ - bool const hasGlobalPrecision{layerPrecisions.find("*") != layerPrecisions.end()}; - auto const globalPrecision = hasGlobalPrecision ? layerPrecisions.at("*") : nvinfer1::DataType::kFLOAT; bool hasLayerPrecisionSkipped{false}; for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) { auto* layer = network.getLayer(layerIdx); auto const layerName = layer->getName(); - if (layerPrecisions.find(layer->getName()) != layerPrecisions.end()) + auto exactMatch = layerPrecisions.find(layerName); + auto plausibleMatch = findPlausible(layerPrecisions, layerName); + if (exactMatch != layerPrecisions.end()) { - layer->setPrecision(layerPrecisions.at(layer->getName())); + sample::gLogInfo << "Set layer " << layerName << " to precision " << exactMatch->second << std::endl; + layer->setPrecision(exactMatch->second); } - else if (hasGlobalPrecision) + else if (plausibleMatch != layerPrecisions.end()) { - // We should not set the layer precision if its default precision is INT32 or Bool. - if (layer->getPrecision() == nvinfer1::DataType::kINT32 - || layer->getPrecision() == nvinfer1::DataType::kBOOL) + if (isNonActivationType(layer->getPrecision())) { hasLayerPrecisionSkipped = true; sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because the " - << " default layer precision is INT32 or Bool." << std::endl; + << " default layer precision is of non-activation type." << std::endl; continue; } - // We should not set the constant layer precision if its weights are in INT32. if (layer->getType() == nvinfer1::LayerType::kCONSTANT - && static_cast(layer)->getWeights().type == nvinfer1::DataType::kINT32) + && (isNonActivationType(static_cast(layer)->getWeights().type))) { hasLayerPrecisionSkipped = true; sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this " - << "constant layer has INT32 weights." << std::endl; + << "constant layer has weights of non-activation type." << std::endl; continue; } - // We should not set the layer precision if the layer operates on a shape tensor. if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor()) { hasLayerPrecisionSkipped = true; @@ -606,18 +447,17 @@ void setLayerPrecisions(nvinfer1::INetworkDefinition& network, LayerPrecisions c << "operates on a shape tensor." << std::endl; continue; } - if ((layer->getType() == nvinfer1::LayerType::kIDENTITY - || layer->getType() == nvinfer1::LayerType::kSHUFFLE) - && layer->getNbInputs() >= 1 && layer->getInput(0)->getType() == nvinfer1::DataType::kINT32 - && layer->getNbOutputs() >= 1 && layer->getOutput(0)->getType() == nvinfer1::DataType::kINT32) + if (layer->getNbInputs() >= 1 && isNonActivationType(layer->getInput(0)->getType()) + && layer->getNbOutputs() >= 1 && isNonActivationType(layer->getOutput(0)->getType())) { hasLayerPrecisionSkipped = true; sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this " - << "layer has INT32 input and output." << std::endl; + << "layer has input and output of non-activation type." << std::endl; continue; } // All heuristics passed. Set the layer precision. - layer->setPrecision(globalPrecision); + sample::gLogInfo << "Set layer " << layerName << " to precision " << plausibleMatch->second << std::endl; + layer->setPrecision(plausibleMatch->second); } } @@ -628,7 +468,7 @@ void setLayerPrecisions(nvinfer1::INetworkDefinition& network, LayerPrecisions c } } -void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes const& layerOutputTypes) +void setLayerOutputTypes(INetworkDefinition& network, LayerOutputTypes const& layerOutputTypes) { bool const hasGlobalOutputType{layerOutputTypes.find("*") != layerOutputTypes.end()}; auto const globalOutputType = hasGlobalOutputType ? layerOutputTypes.at("*").at(0) : nvinfer1::DataType::kFLOAT; @@ -638,9 +478,11 @@ void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes auto* layer = network.getLayer(layerIdx); auto const layerName = layer->getName(); auto const nbOutputs = layer->getNbOutputs(); - if (layerOutputTypes.find(layer->getName()) != layerOutputTypes.end()) + auto exactMatch = layerOutputTypes.find(layerName); + auto plausibleMatch = findPlausible(layerOutputTypes, layerName); + if (exactMatch != layerOutputTypes.end()) { - auto const& outputTypes = layerOutputTypes.at(layer->getName()); + auto const& outputTypes = exactMatch->second; bool const isBroadcast = (outputTypes.size() == 1); if (!isBroadcast && static_cast(outputTypes.size()) != nbOutputs) { @@ -651,11 +493,17 @@ void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes } for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) { - layer->setOutputType(outputIdx, outputTypes.at(isBroadcast ? 0 : outputIdx)); + auto const outputType = outputTypes.at(isBroadcast ? 0 : outputIdx); + sample::gLogInfo << "Set output " << outputIdx << " of layer " << layerName << " to type " << outputType + << std::endl; + layer->setOutputType(outputIdx, outputType); } } - else if (hasGlobalOutputType) + else if (plausibleMatch != layerOutputTypes.end()) { + auto const& outputTypes = plausibleMatch->second; + bool const isBroadcast = (outputTypes.size() == 1); + // We should not set the layer output types if its default precision is INT32 or Bool. if (layer->getPrecision() == nvinfer1::DataType::kINT32 || layer->getPrecision() == nvinfer1::DataType::kBOOL) @@ -667,7 +515,7 @@ void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes } // We should not set the constant layer output types if its weights are in INT32. if (layer->getType() == nvinfer1::LayerType::kCONSTANT - && static_cast(layer)->getWeights().type == nvinfer1::DataType::kINT32) + && static_cast(layer)->getWeights().type == nvinfer1::DataType::kINT32) { hasLayerOutputTypeSkipped = true; sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because this " @@ -684,6 +532,10 @@ void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes << layerName << " because it is a shape tensor." << std::endl; continue; } + + auto const outputType = outputTypes.at(isBroadcast ? 0 : outputIdx); + sample::gLogInfo << "Set output " << outputIdx << " of layer " << layerName << " to type " << outputType + << std::endl; layer->setOutputType(outputIdx, globalOutputType); } } @@ -696,45 +548,129 @@ void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes } } -void setMemoryPoolLimits(nvinfer1::IBuilderConfig& config, BuildOptions const& build) +void setLayerDeviceTypes( + INetworkDefinition const& network, IBuilderConfig& config, LayerDeviceTypes const& layerDeviceTypes) { - auto const roundToBytes = [](double const sizeInMB) { return static_cast(sizeInMB * (1 << 20)); }; + for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) + { + auto* layer = network.getLayer(layerIdx); + auto const layerName = layer->getName(); + auto match = findPlausible(layerDeviceTypes, layerName); + if (match != layerDeviceTypes.end()) + { + DeviceType const deviceType = match->second; + sample::gLogInfo << "Set layer " << layerName << " to device type " << (int)deviceType << std::endl; + config.setDeviceType(layer, deviceType); + } + } +} + +void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors) +{ + for (int64_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex) + { + auto* t = network.getInput(inputIndex); + auto const tensorName = t->getName(); + if (debugTensors.count(tensorName) > 0) + { + network.markDebug(*t); + } + } + for (int64_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex) + { + auto* layer = network.getLayer(layerIndex); + for (int64_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex) + { + auto* t = layer->getOutput(outputIndex); + auto const tensorName = t->getName(); + if (debugTensors.count(tensorName) > 0) + { + network.markDebug(*t); + } + } + } +} + +void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) +{ + auto const roundToBytes = [](double const size, bool fromMB = true) { + return static_cast(size * (fromMB ? 1.0_MiB : 1.0_KiB)); + }; if (build.workspace >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); + { + config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); + } if (build.dlaSRAM >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, roundToBytes(build.dlaSRAM)); + { + size_t const sizeInBytes = roundToBytes(build.dlaSRAM); + size_t sizeInPowerOf2{1}; + // Using 2^30 bytes as a loose upper bound to prevent the possibility of overflows and infinite loops. + while (sizeInPowerOf2 < 31 && (static_cast(1) << sizeInPowerOf2) <= sizeInBytes) + { + ++sizeInPowerOf2; + } + --sizeInPowerOf2; + if (sizeInPowerOf2 == 30) + { + sample::gLogWarning + << "User-specified DLA managed SRAM size is too large and has been clipped to 2^30 bytes. " + << "Please make sure that this is the intended managed SRAM size." << std::endl; + } + config.setMemoryPoolLimit(MemoryPoolType::kDLA_MANAGED_SRAM, static_cast(1) << sizeInPowerOf2); + } if (build.dlaLocalDRAM >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, roundToBytes(build.dlaLocalDRAM)); + { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_LOCAL_DRAM, roundToBytes(build.dlaLocalDRAM)); + } if (build.dlaGlobalDRAM >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, roundToBytes(build.dlaGlobalDRAM)); + { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_GLOBAL_DRAM, roundToBytes(build.dlaGlobalDRAM)); + } + if (build.tacticSharedMem >= 0) + { + config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem, false)); + } +} + +void setPreviewFeatures(IBuilderConfig& config, BuildOptions const& build) +{ + auto const setFlag = [&](PreviewFeature feat) { + int32_t featVal = static_cast(feat); + if (build.previewFeatures.find(featVal) != build.previewFeatures.end()) + { + config.setPreviewFeature(feat, build.previewFeatures.at(featVal)); + } + }; + setFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03); } } // namespace -bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, nvinfer1::IBuilderConfig& config, std::ostream& err, - std::vector>& sparseWeights) +bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, IBuilder& builder, + INetworkDefinition& network, IBuilderConfig& config, std::unique_ptr& calibrator, + std::ostream& err, std::vector>& sparseWeights) { - nvinfer1::IOptimizationProfile* profile{nullptr}; - if (build.maxBatch) - builder.setMaxBatchSize(build.maxBatch); - else + std::vector profiles{}; + profiles.resize(build.optProfiles.size()); + for (auto& profile : profiles) + { profile = builder.createOptimizationProfile(); + } bool hasDynamicShapes{false}; bool broadcastInputFormats = broadcastIOFormats(build.inputFormats, network.getNbInputs()); - if (profile) + // Check if the provided input tensor names match the input tensors of the engine. + // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. + for (auto const& shapes : build.optProfiles) { - // Check if the provided input tensor names match the input tensors of the engine. - // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. - for (const auto& shape : build.shapes) + for (auto const& shape : shapes) { bool tensorNameFound{false}; for (int32_t i = 0; i < network.getNbInputs(); ++i) { - if (network.getInput(i)->getName() == shape.first) + if (matchStringWithOneWildcard(shape.first, network.getInput(i)->getName())) { tensorNameFound = true; break; @@ -755,45 +691,31 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, auto* input = network.getInput(i); if (!build.inputFormats.empty()) { - int inputFormatIndex = broadcastInputFormats ? 0 : i; + int32_t inputFormatIndex = broadcastInputFormats ? 0 : i; input->setType(build.inputFormats[inputFormatIndex].first); input->setAllowedFormats(build.inputFormats[inputFormatIndex].second); } - else - { - switch (input->getType()) - { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kHALF: - // Leave these as is. - break; - case nvinfer1::DataType::kFLOAT: - case nvinfer1::DataType::kINT8: - // User did not specify a floating-point format. Default to kFLOAT. - input->setType(nvinfer1::DataType::kFLOAT); - break; - } - input->setAllowedFormats(1U << static_cast(nvinfer1::TensorFormat::kLINEAR)); - } - if (profile) + auto const dims = input->getDimensions(); + auto const isScalar = dims.nbDims == 0; + auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) + || input->isShapeTensor(); + if (isDynamicInput) { - auto const dims = input->getDimensions(); - auto const isScalar = dims.nbDims == 0; - auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) - || input->isShapeTensor(); - if (isDynamicInput) + hasDynamicShapes = true; + for (size_t i = 0; i < build.optProfiles.size(); i++) { - hasDynamicShapes = true; - auto shape = build.shapes.find(input->getName()); + auto const& optShapes = build.optProfiles[i]; + auto profile = profiles[i]; + auto const tensorName = input->getName(); + auto shape = findPlausible(optShapes, tensorName); ShapeRange shapes{}; // If no shape is provided, set dynamic dimensions to 1. - if (shape == build.shapes.end()) + if (shape == optShapes.end()) { - constexpr int DEFAULT_DIMENSION = 1; - std::vector staticDims; + constexpr int32_t kDEFAULT_DIMENSION{1}; + std::vector staticDims; if (input->isShapeTensor()) { if (isScalar) @@ -803,16 +725,16 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, else { staticDims.resize(dims.d[0]); - std::fill(staticDims.begin(), staticDims.end(), DEFAULT_DIMENSION); + std::fill(staticDims.begin(), staticDims.end(), kDEFAULT_DIMENSION); } } else { staticDims.resize(dims.nbDims); std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), - [&](int dimension) { return dimension > 0 ? dimension : DEFAULT_DIMENSION; }); + [&](int dimension) { return dimension > 0 ? dimension : kDEFAULT_DIMENSION; }); } - sample::gLogWarning << "Dynamic dimensions required for input: " << input->getName() + sample::gLogWarning << "Dynamic dimensions required for input: " << tensorName << ", but no shapes were provided. Automatically overriding shape to: " << staticDims << std::endl; std::fill(shapes.begin(), shapes.end(), staticDims); @@ -825,39 +747,62 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, std::vector profileDims{}; if (input->isShapeTensor()) { - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMIN)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues(input->getName(), nvinfer1::OptProfileSelector::kMIN, + profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues(tensorName, OptProfileSelector::kMIN, profileDims.data(), static_cast(profileDims.size())), "Error in set shape values MIN", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kOPT)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues(input->getName(), nvinfer1::OptProfileSelector::kOPT, + profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues(tensorName, OptProfileSelector::kOPT, profileDims.data(), static_cast(profileDims.size())), "Error in set shape values OPT", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMAX)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues(input->getName(), nvinfer1::OptProfileSelector::kMAX, + profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues(tensorName, OptProfileSelector::kMAX, profileDims.data(), static_cast(profileDims.size())), "Error in set shape values MAX", false, err); + sample::gLogInfo << "Set input shape tensor " << tensorName << " for optimization profile " << i + << " to:" + << " MIN=" << shapes[static_cast(OptProfileSelector::kMIN)] + << " OPT=" << shapes[static_cast(OptProfileSelector::kOPT)] + << " MAX=" << shapes[static_cast(OptProfileSelector::kMAX)] << std::endl; } else { - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMIN)]; + profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, toDims(profileDims)), + profile->setDimensions(tensorName, OptProfileSelector::kMIN, toDims(profileDims)), "Error in set dimensions to profile MIN", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kOPT)]; + profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, toDims(profileDims)), + profile->setDimensions(tensorName, OptProfileSelector::kOPT, toDims(profileDims)), "Error in set dimensions to profile OPT", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMAX)]; + profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, toDims(profileDims)), + profile->setDimensions(tensorName, OptProfileSelector::kMAX, toDims(profileDims)), "Error in set dimensions to profile MAX", false, err); + sample::gLogInfo << "Set shape of input tensor " << tensorName << " for optimization profile " << i + << " to:" + << " MIN=" << shapes[static_cast(OptProfileSelector::kMIN)] + << " OPT=" << shapes[static_cast(OptProfileSelector::kOPT)] + << " MAX=" << shapes[static_cast(OptProfileSelector::kMAX)] << std::endl; } } } } - if (!hasDynamicShapes && !build.shapes.empty()) + for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) + { + auto* output = network.getOutput(i); + auto const dims = output->getDimensions(); + // A shape tensor output with known static dimensions may have dynamic shape values inside it. + auto const isDynamicOutput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) + || output->isShapeTensor(); + if (isDynamicOutput) + { + hasDynamicShapes = true; + } + } + + if (!hasDynamicShapes && !build.optProfiles[0].empty()) { sample::gLogError << "Static model does not take explicit shapes since the shape of inference tensors will be " "determined by the model itself" @@ -865,10 +810,14 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, return false; } - if (profile && hasDynamicShapes) + if (hasDynamicShapes) { - SMP_RETVAL_IF_FALSE(profile->isValid(), "Required optimization profile is invalid", false, err); - SMP_RETVAL_IF_FALSE(config.addOptimizationProfile(profile) != -1, "Error in add optimization profile", false, err); + for (auto profile : profiles) + { + SMP_RETVAL_IF_FALSE(profile->isValid(), "Required optimization profile is invalid", false, err); + SMP_RETVAL_IF_FALSE( + config.addOptimizationProfile(profile) != -1, "Error in add optimization profile", false, err); + } } bool broadcastOutputFormats = broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false); @@ -879,43 +828,118 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, auto* output = network.getOutput(i); if (!build.outputFormats.empty()) { - int outputFormatIndex = broadcastOutputFormats ? 0 : i; + int32_t outputFormatIndex = broadcastOutputFormats ? 0 : i; output->setType(build.outputFormats[outputFormatIndex].first); output->setAllowedFormats(build.outputFormats[outputFormatIndex].second); } - else - { - output->setAllowedFormats(1U << static_cast(nvinfer1::TensorFormat::kLINEAR)); - } } setMemoryPoolLimits(config, build); + setPreviewFeatures(config, build); + + if (build.builderOptimizationLevel != defaultBuilderOptimizationLevel) + { + config.setBuilderOptimizationLevel(build.builderOptimizationLevel); + } + + if (build.maxTactics != defaultMaxTactics) + { +#if (NV_TENSORRT_MAJOR < 9) + config.setMaxNbTactics(build.maxTactics); +#else + config.setTacticSources(build.maxTactics); +#endif + } + if (build.timingCacheMode == TimingCacheMode::kDISABLE) - config.setFlag(nvinfer1::BuilderFlag::kDISABLE_TIMING_CACHE); + { + config.setFlag(BuilderFlag::kDISABLE_TIMING_CACHE); + } + + if (build.disableCompilationCache) + { + config.setFlag(BuilderFlag::kDISABLE_COMPILATION_CACHE); + } + + if (build.errorOnTimingCacheMiss) + { + config.setFlag(BuilderFlag::kERROR_ON_TIMING_CACHE_MISS); + } if (!build.tf32) - config.clearFlag(nvinfer1::BuilderFlag::kTF32); + { + config.clearFlag(BuilderFlag::kTF32); + } if (build.refittable) - config.setFlag(nvinfer1::BuilderFlag::kREFIT); + { + config.setFlag(BuilderFlag::kREFIT); + } + + if (build.stripWeights) + { + // The kREFIT_IDENTICAL is enabled by default when kSTRIP_PLAN is on. + config.setFlag(BuilderFlag::kSTRIP_PLAN); + } + + if (build.versionCompatible) + { + config.setFlag(BuilderFlag::kVERSION_COMPATIBLE); + } +#if !TRT_WINML + std::vector pluginPaths; + for (auto const& pluginPath : sys.setPluginsToSerialize) + { + sample::gLogVerbose << "Setting plugin to serialize: " << pluginPath << std::endl; + pluginPaths.push_back(pluginPath.c_str()); + } + if (!pluginPaths.empty()) + { + config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size()); + } +#endif + if (build.excludeLeanRuntime) + { + config.setFlag(BuilderFlag::kEXCLUDE_LEAN_RUNTIME); + } if (build.sparsity != SparsityFlag::kDISABLE) { - config.setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); + config.setFlag(BuilderFlag::kSPARSE_WEIGHTS); if (build.sparsity == SparsityFlag::kFORCE) + { sparsify(network, sparseWeights); + } } config.setProfilingVerbosity(build.profilingVerbosity); - config.setMinTimingIterations(build.minTiming); config.setAvgTimingIterations(build.avgTiming); if (build.fp16) - config.setFlag(nvinfer1::BuilderFlag::kFP16); - + { + config.setFlag(BuilderFlag::kFP16); + } if (build.int8) - config.setFlag(nvinfer1::BuilderFlag::kINT8); + { + config.setFlag(BuilderFlag::kINT8); + } + if (build.bf16) + { + config.setFlag(BuilderFlag::kBF16); + } + + SMP_RETVAL_IF_FALSE(!(build.int8 && build.fp8), "FP8 and INT8 precisions have been specified", false, err); + + if (build.fp8) + { + config.setFlag(BuilderFlag::kFP8); + } + + if (build.int4) + { + config.setFlag(BuilderFlag::kINT4); + } if (build.int8 && !build.fp16) { @@ -925,18 +949,20 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, << std::endl; } - auto isInt8 = [](const IOFormat& format) { return format.first == nvinfer1::DataType::kINT8; }; + auto isInt8 = [](const IOFormat& format) { return format.first == DataType::kINT8; }; auto int8IO = std::count_if(build.inputFormats.begin(), build.inputFormats.end(), isInt8) + std::count_if(build.outputFormats.begin(), build.outputFormats.end(), isInt8); - auto hasQDQLayers = [](nvinfer1::INetworkDefinition& network) { + auto hasQDQLayers = [](INetworkDefinition& network) { // Determine if our network has QDQ layers. - const auto nbLayers = network.getNbLayers(); + auto const nbLayers = network.getNbLayers(); for (int32_t i = 0; i < nbLayers; i++) { - const auto& layer = network.getLayer(i); - if (layer->getType() == nvinfer1::LayerType::kQUANTIZE || layer->getType() == nvinfer1::LayerType::kDEQUANTIZE) + auto const& layer = network.getLayer(i); + if (layer->getType() == LayerType::kQUANTIZE || layer->getType() == LayerType::kDEQUANTIZE) + { return true; + } } return false; }; @@ -965,28 +991,37 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, return false; } } - nvinfer1::IOptimizationProfile* profileCalib{nullptr}; + IOptimizationProfile* profileCalib{nullptr}; if (!build.shapesCalib.empty()) { profileCalib = builder.createOptimizationProfile(); for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) { auto* input = network.getInput(i); - nvinfer1::Dims profileDims{}; - auto shape = build.shapesCalib.find(input->getName()); - ShapeRange shapesCalib{}; - shapesCalib = shape->second; + Dims profileDims{}; + auto const tensorName = input->getName(); + auto shape = findPlausible(build.shapesCalib, tensorName); - profileDims = toDims(shapesCalib[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + if (shape == build.shapesCalib.end()) + { + std::ostringstream msg; + msg << "Calibration profile for tensor " << tensorName << " cannot be found!"; + throw std::invalid_argument(msg.str()); + } + + auto shapesCalib = shape->second; + profileDims = toDims(shapesCalib[static_cast(OptProfileSelector::kOPT)]); // Here we check only kMIN as all profileDims are the same. - SMP_RETVAL_IF_FALSE( - profileCalib->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, profileDims), + SMP_RETVAL_IF_FALSE(profileCalib->setDimensions(tensorName, OptProfileSelector::kMIN, profileDims), "Error in set dimensions to calibration profile OPT", false, err); - profileCalib->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, profileDims); - profileCalib->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, profileDims); + profileCalib->setDimensions(tensorName, OptProfileSelector::kOPT, profileDims); + profileCalib->setDimensions(tensorName, OptProfileSelector::kMAX, profileDims); + sample::gLogInfo << "Set calibration profile for input tensor " << tensorName << " to " << profileDims + << std::endl; } SMP_RETVAL_IF_FALSE(profileCalib->isValid(), "Calibration profile is invalid", false, err); - SMP_RETVAL_IF_FALSE(config.setCalibrationProfile(profileCalib), "Error in set calibration profile", false, err); + SMP_RETVAL_IF_FALSE( + config.setCalibrationProfile(profileCalib), "Error in set calibration profile", false, err); } std::vector elemCount{}; @@ -994,59 +1029,96 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, { auto* input = network.getInput(i); auto const dims = input->getDimensions(); - auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); + auto const isDynamicInput + = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); if (profileCalib) - elemCount.push_back(volume(profileCalib->getDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT))); - else if (profile && isDynamicInput) - elemCount.push_back(volume(profile->getDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT))); + { + elemCount.push_back(volume(profileCalib->getDimensions(input->getName(), OptProfileSelector::kOPT))); + } + else if (!profiles.empty() && isDynamicInput) + { + elemCount.push_back( + volume(profiles[build.calibProfile]->getDimensions(input->getName(), OptProfileSelector::kOPT))); + } else + { elemCount.push_back(volume(input->getDimensions())); + } } - config.setInt8Calibrator(new RndInt8Calibrator(1, elemCount, build.calibration, network, err)); + calibrator.reset(new RndInt8Calibrator(1, elemCount, build.calibration, network, err)); + config.setInt8Calibrator(calibrator.get()); } if (build.directIO) - config.setFlag(nvinfer1::BuilderFlag::kDIRECT_IO); + { + config.setFlag(BuilderFlag::kDIRECT_IO); + } switch (build.precisionConstraints) { case PrecisionConstraints::kNONE: // It's the default for TensorRT. break; - case PrecisionConstraints::kOBEY: - config.setFlag(nvinfer1::BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); - break; - case PrecisionConstraints::kPREFER: config.setFlag(nvinfer1::BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); break; + case PrecisionConstraints::kOBEY: config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); break; + case PrecisionConstraints::kPREFER: config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); break; } if (!build.layerPrecisions.empty() && build.precisionConstraints != PrecisionConstraints::kNONE) + { setLayerPrecisions(network, build.layerPrecisions); + } if (!build.layerOutputTypes.empty() && build.precisionConstraints != PrecisionConstraints::kNONE) + { setLayerOutputTypes(network, build.layerOutputTypes); + } + + if (!build.layerDeviceTypes.empty()) + { + setLayerDeviceTypes(network, config, build.layerDeviceTypes); + } - if (build.safe) - config.setEngineCapability(sys.DLACore != -1 ? nvinfer1::EngineCapability::kDLA_STANDALONE : nvinfer1::EngineCapability::kSAFETY); + if (!build.debugTensors.empty()) + { + markDebugTensors(network, build.debugTensors); + } + + if (build.safe && sys.DLACore == -1) + { + config.setEngineCapability(EngineCapability::kSAFETY); + } if (build.restricted) - config.setFlag(nvinfer1::BuilderFlag::kSAFETY_SCOPE); + { + config.setFlag(BuilderFlag::kSAFETY_SCOPE); + } if (sys.DLACore != -1) { if (sys.DLACore < builder.getNbDLACores()) { - config.setDefaultDeviceType(nvinfer1::DeviceType::kDLA); + config.setDefaultDeviceType(DeviceType::kDLA); config.setDLACore(sys.DLACore); - config.setFlag(nvinfer1::BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); - - if (sys.fallback) - config.setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); - else // Reformatting runs on GPU, so avoid I/O reformatting - config.setFlag(nvinfer1::BuilderFlag::kDIRECT_IO); + config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); + if (build.buildDLAStandalone) + { + config.setEngineCapability(EngineCapability::kDLA_STANDALONE); + } + if (build.allowGPUFallback) + { + config.setFlag(BuilderFlag::kGPU_FALLBACK); + } + else + { + // Reformatting runs on GPU, so avoid I/O reformatting. + config.setFlag(BuilderFlag::kDIRECT_IO); + } if (!build.int8) - config.setFlag(nvinfer1::BuilderFlag::kFP16); + { + config.setFlag(BuilderFlag::kFP16); + } } else { @@ -1057,37 +1129,50 @@ bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, if (build.enabledTactics || build.disabledTactics) { - nvinfer1::TacticSources tacticSources = config.getTacticSources(); + TacticSources tacticSources = config.getTacticSources(); tacticSources |= build.enabledTactics; tacticSources &= ~build.disabledTactics; config.setTacticSources(tacticSources); } + config.setHardwareCompatibilityLevel(build.hardwareCompatibilityLevel); + config.setRuntimePlatform(build.runtimePlatform); + + if (build.maxAuxStreams != defaultMaxAuxStreams) + { + config.setMaxAuxStreams(build.maxAuxStreams); + } + + if (build.allowWeightStreaming) + { + config.setFlag(BuilderFlag::kWEIGHT_STREAMING); + } + return true; } //! -//! \brief Create an engine for a network defintion +//! \brief Create a serialized engine for a network defintion //! -//! \return Pointer to the engine created or nullptr if the creation failed +//! \return Whether the engine creation succeeds or fails. //! -bool networkToEngine(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - BuildEnvironment& env, std::ostream& err) +bool networkToSerializedEngine( + BuildOptions const& build, SystemOptions const& sys, IBuilder& builder, BuildEnvironment& env, std::ostream& err) { - TrtUniquePtr config{builder.createBuilderConfig()}; - std::vector> sparseWeights; + std::unique_ptr config{builder.createBuilderConfig()}; + std::unique_ptr calibrator; + std::vector> sparseWeights; SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err); - SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, *env.network, *config, err, sparseWeights), + SMP_RETVAL_IF_FALSE( + setupNetworkAndConfig(build, sys, builder, *env.network, *config, calibrator, err, sparseWeights), "Network And Config setup failed", false, err); - std::unique_ptr timingCache{nullptr}; + std::unique_ptr timingCache{}; // Try to load cache from file. Create a fresh cache if the file doesn't exist if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - std::vector loadedCache = loadTimingCacheFile(build.timingCacheFile); - timingCache.reset(config->createTimingCache(static_cast(loadedCache.data()), loadedCache.size())); - SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", false, err); - config->setTimingCache(*timingCache, false); + timingCache + = samplesCommon::buildTimingCacheFromFile(gLogger.getTRTLogger(), *config, build.timingCacheFile, err); } // CUDA stream used for profiling by the builder. @@ -1095,41 +1180,22 @@ bool networkToEngine(const BuildOptions& build, const SystemOptions& sys, nvinfe SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", false, err); config->setProfileStream(*profileStream); - TrtUniquePtr serializedEngine{builder.buildSerializedNetwork(*env.network, *config)}; + auto const tBegin = std::chrono::high_resolution_clock::now(); + std::unique_ptr serializedEngine{builder.buildSerializedNetwork(*env.network, *config)}; SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, "Engine could not be created from network", false, err); + auto const tEnd = std::chrono::high_resolution_clock::now(); + float const buildTime = std::chrono::duration(tEnd - tBegin).count(); + sample::gLogInfo << "Engine built in " << buildTime << " sec." << std::endl; + sample::gLogInfo << "Created engine with size: " << (serializedEngine->size() / 1.0_MiB) << " MiB" << std::endl; - env.engineBlob.resize(serializedEngine->size()); - std::memcpy(env.engineBlob.data(), serializedEngine->data(), serializedEngine->size()); - - if (build.safe) - { - ASSERT(sample::hasSafeRuntime()); - std::unique_ptr safeRuntime{sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(safeRuntime != nullptr, "SafeRuntime creation failed", false, err); - safeRuntime->setErrorRecorder(&gRecorder); - env.safeEngine.reset(safeRuntime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size())); - if (build.consistency) - checkSafeEngine(serializedEngine->data(), serializedEngine->size()); + env.engine.setBlob(serializedEngine); - SMP_RETVAL_IF_FALSE(env.safeEngine != nullptr, "SafeEngine deserialization failed", false, err); - } - else + if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - TrtUniquePtr runtime{nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(runtime != nullptr, "Runtime creation failed", false, err); - runtime->setErrorRecorder(&gRecorder); - env.engine.reset(runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size())); - SMP_RETVAL_IF_FALSE(env.engine != nullptr, "Engine deserialization failed", false, err); - if (build.timingCacheMode == TimingCacheMode::kGLOBAL) - { - auto const& timingCache = config->getTimingCache(); - std::unique_ptr timingCacheHostData{timingCache->serialize()}; - SMP_RETVAL_IF_FALSE(timingCacheHostData != nullptr, "Timing Cache serialization failed", false, err); - saveTimingCacheFile(build.timingCacheFile, timingCacheHostData.get()); - } - if (config->getInt8Calibrator()) - delete config->getInt8Calibrator(); + auto timingCache = config->getTimingCache(); + samplesCommon::updateTimingCacheFile(gLogger.getTRTLogger(), build.timingCacheFile, timingCache, builder); } + return true; } @@ -1137,24 +1203,67 @@ bool networkToEngine(const BuildOptions& build, const SystemOptions& sys, nvinfe //! \brief Parse a given model, create a network and an engine. //! bool modelToBuildEnv( - const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, BuildEnvironment& env, std::ostream& err) + ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err) { - TrtUniquePtr builder{nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", false, err); - builder->setErrorRecorder(&gRecorder); - auto networkFlags = (build.maxBatch) ? 0U : 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + env.builder.reset(createBuilder()); + SMP_RETVAL_IF_FALSE(env.builder != nullptr, "Builder creation failed", false, err); + env.builder->setErrorRecorder(&gRecorder); + auto networkFlags = (build.stronglyTyped) + ? 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED) + : 0U; +#if !TRT_WINML + for (auto const& pluginPath : sys.dynamicPlugins) + { + env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } +#endif + env.network.reset(env.builder->createNetworkV2(networkFlags)); - env.network.reset(builder->createNetworkV2(networkFlags)); + std::vector vcPluginLibrariesUsed; SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false, err); - env.parser = modelToNetwork(model, *env.network, err); + env.parser + = modelToNetwork(model, build, *env.network, err, build.versionCompatible ? &vcPluginLibrariesUsed : nullptr); SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, err); - SMP_RETVAL_IF_FALSE(networkToEngine(build, sys, *builder, env, err), "Building engine failed", false, err); + +#if !TRT_WINML + if (build.versionCompatible && !sys.ignoreParsedPluginLibs && !vcPluginLibrariesUsed.empty()) + { + sample::gLogInfo << "The following plugin libraries were identified by the parser as required for a " + "version-compatible engine:" + << std::endl; + for (auto const& lib : vcPluginLibrariesUsed) + { + sample::gLogInfo << " " << lib << std::endl; + } + if (!build.excludeLeanRuntime) + { + sample::gLogInfo << "These libraries will be added to --setPluginsToSerialize since --excludeLeanRuntime " + "was not specified." + << std::endl; + std::copy(vcPluginLibrariesUsed.begin(), vcPluginLibrariesUsed.end(), + std::back_inserter(sys.setPluginsToSerialize)); + } + sample::gLogInfo << "These libraries will be added to --dynamicPlugins for use at inference time." << std::endl; + std::copy(vcPluginLibrariesUsed.begin(), vcPluginLibrariesUsed.end(), std::back_inserter(sys.dynamicPlugins)); + + // Implicitly-added plugins from ONNX parser should be loaded into plugin registry as well. + for (auto const& pluginPath : vcPluginLibrariesUsed) + { + env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } + + sample::gLogInfo << "Use --ignoreParsedPluginLibs to disable this behavior." << std::endl; + } +#endif + + SMP_RETVAL_IF_FALSE( + networkToSerializedEngine(build, sys, *env.builder, env, err), "Building engine failed", false, err); return true; } namespace { -std::pair, std::vector> getLayerWeightsRolePair(nvinfer1::IRefitter& refitter) +std::pair, std::vector> getLayerWeightsRolePair(IRefitter& refitter) { // Get number of refittable items. auto const nbAll = refitter.getAll(0, nullptr, nullptr); @@ -1165,69 +1274,103 @@ std::pair, std::vector> getLayer std::vector layerNameStrs(nbAll); std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { if (name == nullptr) + { return std::string{}; - + } return std::string{name}; }); return {layerNameStrs, weightsRoles}; } -std::pair, std::vector> getMissingLayerWeightsRolePair(nvinfer1::IRefitter& refitter) +std::pair, std::vector> getMissingLayerWeightsRolePair(IRefitter& refitter) { // Get number of refittable items. auto const nbMissing = refitter.getMissing(0, nullptr, nullptr); - std::vector layerNames(nbMissing); + std::vector layerNames(nbMissing); // Allocate buffers for the items and get them. std::vector weightsRoles(nbMissing); refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); std::vector layerNameStrs(nbMissing); std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { if (name == nullptr) + { return std::string{}; + } return std::string{name}; }); return {layerNameStrs, weightsRoles}; } +} // namespace + +bool loadStreamingEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err) +{ + auto& reader = env.engine.getFileReader(); + SMP_RETVAL_IF_FALSE(reader.open(filepath), "", false, err << "Error opening engine file: " << filepath); + return true; +} -bool loadEngineToEnv(const std::string& engine, int DLACore, bool safe, bool enableConsistency, BuildEnvironment& env, std::ostream& err) +bool loadEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err) { - std::ifstream engineFile(engine, std::ios::binary); - SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << engine); + auto const tBegin = std::chrono::high_resolution_clock::now(); + std::ifstream engineFile(filepath, std::ios::binary); + SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << filepath); engineFile.seekg(0, std::ifstream::end); int64_t fsize = engineFile.tellg(); engineFile.seekg(0, std::ifstream::beg); - env.engineBlob.resize(fsize); - engineFile.read(reinterpret_cast(env.engineBlob.data()), fsize); - SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error loading engine file: " << engine); + std::vector engineBlob(fsize); + engineFile.read(reinterpret_cast(engineBlob.data()), fsize); + SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error loading engine file: " << filepath); + auto const tEnd = std::chrono::high_resolution_clock::now(); + float const loadTime = std::chrono::duration(tEnd - tBegin).count(); + sample::gLogInfo << "Engine loaded in " << loadTime << " sec." << std::endl; + sample::gLogInfo << "Loaded engine with size: " << (fsize / 1.0_MiB) << " MiB" << std::endl; + + env.engine.setBlob(std::move(engineBlob)); + + return true; +} + +bool printPlanVersion(BuildEnvironment& env, std::ostream& err) +{ + constexpr int64_t kPLAN_SIZE{28}; + std::vector data(kPLAN_SIZE); + auto blob = data.data(); - if (safe) + auto& reader = env.engine.getFileReader(); + if (reader.isOpen()) { - ASSERT(sample::hasSafeRuntime()); - std::unique_ptr safeRuntime{sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - safeRuntime->setErrorRecorder(&gRecorder); - env.safeEngine.reset(safeRuntime->deserializeCudaEngine(env.engineBlob.data(), fsize)); - bool result = env.safeEngine != nullptr; - if (result && enableConsistency) - { - checkSafeEngine(env.engineBlob.data(), fsize); - } - return result; + SMP_RETVAL_IF_FALSE(reader.read(data.data(), kPLAN_SIZE) == kPLAN_SIZE, "Failed to read plan file", false, err); } + else + { + SMP_RETVAL_IF_FALSE(env.engine.getBlob().data != nullptr, "Plan file is empty", false, err); + SMP_RETVAL_IF_FALSE(env.engine.getBlob().size >= 28, "Plan file is incorrect", false, err); + blob = static_cast(env.engine.getBlob().data); + } + auto blob32 = reinterpret_cast(blob); - TrtUniquePtr runtime{nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())}; - if (DLACore != -1) - runtime->setDLACore(DLACore); - - runtime->setErrorRecorder(&gRecorder); - env.engine.reset(runtime->deserializeCudaEngine(env.engineBlob.data(), fsize)); - return env.engine != nullptr; + //! Correct TensorRT plan file starts with this tag + constexpr uint32_t kPLAN_FILE_TAG{0x74727466U}; + SMP_RETVAL_IF_FALSE(blob32[0] == kPLAN_FILE_TAG, "Failed to verify a plan tag.", false, err); + switch (blob32[1]) + { + case 0U: + { + // Blob index to store the plan version may depend on the serialization version. + sample::gLogInfo << "Plan was created with TensorRT version " << static_cast(blob[24]) + << "." << static_cast(blob[25]) << "." << static_cast(blob[26]) + << "." << static_cast(blob[27]) << std::endl; + return true; + } + } + sample::gLogError << "Serialization version is not supported." << std::endl; + return false; } -} // namespace void dumpRefittable(nvinfer1::ICudaEngine& engine) { - TrtUniquePtr refitter{nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger())}; + std::unique_ptr refitter{createRefitter(engine)}; if (refitter == nullptr) { sample::gLogError << "Failed to create a refitter." << std::endl; @@ -1244,13 +1387,13 @@ void dumpRefittable(nvinfer1::ICudaEngine& engine) } } -nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, std::ostream& err) +ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err) { - BuildEnvironment env; - return loadEngineToEnv(engine, DLACore, false, false, env, err) ? env.engine.release() : nullptr; + BuildEnvironment env(/* isSafe */ false, /* versionCompatible */ false, DLACore, "", getTempfileControlDefaults()); + return loadEngineToBuildEnv(engine, env, err) ? env.engine.release() : nullptr; } -bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName, std::ostream& err) +bool saveEngine(const ICudaEngine& engine, std::string const& fileName, std::ostream& err) { std::ofstream engineFile(fileName, std::ios::binary); if (!engineFile) @@ -1259,7 +1402,7 @@ bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName return false; } - TrtUniquePtr serializedEngine{engine.serialize()}; + std::unique_ptr serializedEngine{engine.serialize()}; if (serializedEngine == nullptr) { err << "Engine serialization failed" << std::endl; @@ -1270,153 +1413,151 @@ bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName return !engineFile.fail(); } -bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, - BuildEnvironment& env, std::ostream& err) +bool getEngineBuildEnv( + const ModelOptions& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err) { - TrtUniquePtr engine; - TrtUniquePtr network; - Parser parser; - - bool createEngineSuccess {false}; + bool createEngineSuccess{false}; if (build.load) - createEngineSuccess = loadEngineToEnv(build.engine, sys.DLACore, build.safe, build.consistency, env, err); + { + if (build.safe) + { + createEngineSuccess = loadEngineToBuildEnv(build.engine, env, err); + } + else + { + createEngineSuccess = loadStreamingEngineToBuildEnv(build.engine, env, err); + } + } else + { createEngineSuccess = modelToBuildEnv(model, build, sys, env, err); + } - SMP_RETVAL_IF_FALSE(createEngineSuccess, "Failed to create engine from model.", false, err); + SMP_RETVAL_IF_FALSE(createEngineSuccess, "Failed to create engine from model or file.", false, err); + + if (build.getPlanVersionOnly && build.load) + { + SMP_RETVAL_IF_FALSE(printPlanVersion(env, err), "Failed to get plan file version.", false, err); + return true; + } if (build.save) { std::ofstream engineFile(build.engine, std::ios::binary); - engineFile.write(reinterpret_cast(env.engineBlob.data()), env.engineBlob.size()); + auto& engineBlob = env.engine.getBlob(); + engineFile.write(static_cast(engineBlob.data), engineBlob.size); SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.", false, err); + engineFile.flush(); + engineFile.close(); + if (!build.safe) + { + env.engine.releaseBlob(); + SMP_RETVAL_IF_FALSE(loadStreamingEngineToBuildEnv(build.engine, env, err), "Reading engine file failed.", false, err); + } } - return true; -} -nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, std::ostream& err) -{ - TrtUniquePtr config{builder.createBuilderConfig()}; - std::vector> sparseWeights; - SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", nullptr, err); - SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, network, *config, err, sparseWeights), - "Network And Config setup failed", nullptr, err); - return builder.buildSerializedNetwork(network, *config); -} - -nvinfer1::IHostMemory* modelToSerialized( - const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err) -{ - TrtUniquePtr builder{nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", nullptr, err); - builder->setErrorRecorder(&gRecorder); - - auto networkFlags - = (build.maxBatch) ? 0U : 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - - TrtUniquePtr network{builder->createNetworkV2(networkFlags)}; - SMP_RETVAL_IF_FALSE(network != nullptr, "Network creation failed", nullptr, err); - - Parser parser = modelToNetwork(model, *network, err); - SMP_RETVAL_IF_FALSE(parser.operator bool(), "Parsing model failed", nullptr, err); - - return networkToSerialized(build, sys, *builder, *network, err); -} - -bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err) -{ - TrtUniquePtr serialized{modelToSerialized(model, build, sys, err)}; - SMP_RETVAL_IF_FALSE(serialized != nullptr, "Network serialization failed", false, err); - - std::ofstream engineFile(build.engine, std::ios::binary); - SMP_RETVAL_IF_FALSE(!!engineFile, "Cannot open a file to save a serialize network", false, err); - engineFile.write(static_cast(serialized->data()), serialized->size()); - return !engineFile.fail(); + return true; } // There is not a getWeightsName API, so we need to use WeightsRole. -std::vector> getAllRefitWeightsForLayer(const nvinfer1::ILayer& l) +std::vector> getAllRefitWeightsForLayer(const ILayer& l) { switch (l.getType()) { - case nvinfer1::LayerType::kCONSTANT: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kCONSTANT, layer.getWeights())}; - } - case nvinfer1::LayerType::kCONVOLUTION: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(nvinfer1::WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case nvinfer1::LayerType::kDECONVOLUTION: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(nvinfer1::WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case nvinfer1::LayerType::kFULLY_CONNECTED: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(nvinfer1::WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case nvinfer1::LayerType::kSCALE: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kSCALE, layer.getScale()), - std::make_pair(nvinfer1::WeightsRole::kSHIFT, layer.getShift())}; - } - case nvinfer1::LayerType::kRNN_V2: - case nvinfer1::LayerType::kACTIVATION: - case nvinfer1::LayerType::kPOOLING: - case nvinfer1::LayerType::kLRN: - case nvinfer1::LayerType::kSOFTMAX: - case nvinfer1::LayerType::kSHUFFLE: - case nvinfer1::LayerType::kCONCATENATION: - case nvinfer1::LayerType::kELEMENTWISE: - case nvinfer1::LayerType::kPLUGIN: - case nvinfer1::LayerType::kUNARY: - case nvinfer1::LayerType::kPADDING: - case nvinfer1::LayerType::kREDUCE: - case nvinfer1::LayerType::kTOPK: - case nvinfer1::LayerType::kGATHER: - case nvinfer1::LayerType::kMATRIX_MULTIPLY: - case nvinfer1::LayerType::kRAGGED_SOFTMAX: - case nvinfer1::LayerType::kIDENTITY: - case nvinfer1::LayerType::kPLUGIN_V2: - case nvinfer1::LayerType::kSLICE: - case nvinfer1::LayerType::kFILL: - case nvinfer1::LayerType::kSHAPE: - case nvinfer1::LayerType::kPARAMETRIC_RELU: - case nvinfer1::LayerType::kRESIZE: - case nvinfer1::LayerType::kTRIP_LIMIT: - case nvinfer1::LayerType::kRECURRENCE: - case nvinfer1::LayerType::kITERATOR: - case nvinfer1::LayerType::kLOOP_OUTPUT: - case nvinfer1::LayerType::kSELECT: - case nvinfer1::LayerType::kQUANTIZE: - case nvinfer1::LayerType::kDEQUANTIZE: - case nvinfer1::LayerType::kCONDITION: - case nvinfer1::LayerType::kCONDITIONAL_INPUT: - case nvinfer1::LayerType::kCONDITIONAL_OUTPUT: - case nvinfer1::LayerType::kSCATTER: - case nvinfer1::LayerType::kEINSUM: - case nvinfer1::LayerType::kASSERTION: return {}; + case LayerType::kCONSTANT: + { + auto const& layer = static_cast(l); + auto const weights = layer.getWeights(); + switch (weights.type) + { + case DataType::kFLOAT: + case DataType::kHALF: + case DataType::kBF16: + case DataType::kINT8: + case DataType::kINT32: + case DataType::kINT64: return {std::make_pair(WeightsRole::kCONSTANT, weights)}; + case DataType::kBOOL: + case DataType::kUINT8: + case DataType::kFP8: + case DataType::kINT4: + // Refit not supported for these types. + break; + } + break; + } + case LayerType::kCONVOLUTION: + { + auto const& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kDECONVOLUTION: + { + auto const& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kSCALE: + { + auto const& layer = static_cast(l); + return {std::make_pair(WeightsRole::kSCALE, layer.getScale()), + std::make_pair(WeightsRole::kSHIFT, layer.getShift())}; + } + case LayerType::kACTIVATION: + case LayerType::kASSERTION: + case LayerType::kCAST: + case LayerType::kCONCATENATION: + case LayerType::kCONDITION: + case LayerType::kCONDITIONAL_INPUT: + case LayerType::kCONDITIONAL_OUTPUT: + case LayerType::kDEQUANTIZE: + case LayerType::kEINSUM: + case LayerType::kELEMENTWISE: + case LayerType::kFILL: + case LayerType::kGATHER: + case LayerType::kGRID_SAMPLE: + case LayerType::kIDENTITY: + case LayerType::kITERATOR: + case LayerType::kLOOP_OUTPUT: + case LayerType::kLRN: + case LayerType::kMATRIX_MULTIPLY: + case LayerType::kNMS: + case LayerType::kNON_ZERO: + case LayerType::kNORMALIZATION: + case LayerType::kONE_HOT: + case LayerType::kPADDING: + case LayerType::kPARAMETRIC_RELU: + case LayerType::kPLUGIN: + case LayerType::kPLUGIN_V2: + case LayerType::kPLUGIN_V3: + case LayerType::kPOOLING: + case LayerType::kQUANTIZE: + case LayerType::kRAGGED_SOFTMAX: + case LayerType::kRECURRENCE: + case LayerType::kREDUCE: + case LayerType::kRESIZE: + case LayerType::kREVERSE_SEQUENCE: + case LayerType::kSCATTER: + case LayerType::kSELECT: + case LayerType::kSHAPE: + case LayerType::kSHUFFLE: + case LayerType::kSLICE: + case LayerType::kSOFTMAX: + case LayerType::kTOPK: + case LayerType::kTRIP_LIMIT: + case LayerType::kUNARY: return {}; } return {}; } -bool timeRefit(nvinfer1::INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, bool multiThreading) +bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, bool multiThreading) { using time_point = std::chrono::time_point; using durationMs = std::chrono::duration; auto const nbLayers = network.getNbLayers(); - TrtUniquePtr refitter{nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger())}; + std::unique_ptr refitter{createRefitter(engine)}; // Set max threads that can be used by refitter. if (multiThreading && !refitter->setMaxThreads(10)) { @@ -1424,17 +1565,17 @@ bool timeRefit(nvinfer1::INetworkDefinition const& network, nvinfer1::ICudaEngin return false; } auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); - // We use std::string instead of const char* since we can have copies of layer names. - std::set> layerRoleSet; + // We use std::string instead of char const* since we can have copies of layer names. + std::set> layerRoleSet; auto const& layerNames = layerWeightsRolePair.first; auto const& weightsRoles = layerWeightsRolePair.second; std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(), std::inserter(layerRoleSet, layerRoleSet.begin()), - [](std::string const& layerName, nvinfer1::WeightsRole const role) { return std::make_pair(layerName, role); }); + [](std::string const& layerName, WeightsRole const role) { return std::make_pair(layerName, role); }); - auto const isRefittable = [&layerRoleSet](char const* layerName, nvinfer1::WeightsRole const role) { + auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) { return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); }; @@ -1449,7 +1590,9 @@ bool timeRefit(nvinfer1::INetworkDefinition const& network, nvinfer1::ICudaEngin { bool const success = refitter->setWeights(layer->getName(), roleWeights.first, roleWeights.second); if (!success) + { return false; + } } } } @@ -1468,29 +1611,35 @@ bool timeRefit(nvinfer1::INetworkDefinition const& network, nvinfer1::ICudaEngin return layerNames.empty(); }; + // Skip weights validation since we are confident that the new weights are similar to the weights used to build + // engine. + refitter->setWeightsValidation(false); + // Warm up and report missing weights + // We only need to set weights for the first time and that can be reused in later refitting process. bool const success = setWeights() && reportMissingWeights() && refitter->refitCudaEngine(); if (!success) { return false; } - constexpr int32_t loop = 10; + TrtCudaStream stream; + constexpr int32_t kLOOP = 10; time_point const refitStartTime{std::chrono::steady_clock::now()}; { - for (int32_t l = 0; l < loop; l++) + for (int32_t l = 0; l < kLOOP; l++) { - bool const success = setWeights() && refitter->refitCudaEngine(); - if (!success) + if (!refitter->refitCudaEngineAsync(stream.get())) { return false; } } } + stream.synchronize(); time_point const refitEndTime{std::chrono::steady_clock::now()}; sample::gLogInfo << "Engine refitted" - << " in " << durationMs(refitEndTime - refitStartTime).count() / loop << " ms." << std::endl; + << " in " << durationMs(refitEndTime - refitStartTime).count() / kLOOP << " ms." << std::endl; return true; } @@ -1499,28 +1648,20 @@ namespace void* initSafeRuntime() { void* handle{nullptr}; + // libsafe_executor.so will be renamed to libnvinfer_safe.so when TRTS-9421 completes. + // Currently libsafe_executor_debug.so for samplesCommon::isDebug() is not ready. +#define TRTS_9421_COMPLETED 0 +#if TRTS_9421_COMPLETED #if !defined(_WIN32) - std::string const dllName{samplesCommon::isDebug() ? "libnvinfer_safe_debug.so.8" : "libnvinfer_safe.so.8"}; + std::string const dllName{"libsafe_executor.so"}; #if SANITIZER_BUILD handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); #else - handle = dlopen(dllName.c_str(), RTLD_LAZY); -#endif -#endif - return handle; -} - -void* initConsistencyCheckerLibrary() -{ - void* handle{nullptr}; -#if !defined(_WIN32) - std::string const dllName{samplesCommon::isDebug() ? "libnvinfer_checker_debug.so.8" : "libnvinfer_checker.so.8"}; -#if SANITIZER_BUILD - handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); -#else - handle = dlopen(dllName.c_str(), RTLD_LAZY); + // RTLD_GLOBAL is used for symbol resolution of subsequently loaded plugin libraries + handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_GLOBAL); #endif #endif +#endif // TRTS_9421_COMPLETED return handle; } @@ -1536,7 +1677,6 @@ struct DllDeleter } }; const std::unique_ptr safeRuntimeLibrary{initSafeRuntime()}; -const std::unique_ptr consistencyCheckerLibrary{initConsistencyCheckerLibrary()}; #endif } // namespace @@ -1549,81 +1689,4 @@ bool hasSafeRuntime() return ret; } -nvinfer1::safe::IRuntime* createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept -{ - nvinfer1::safe::IRuntime* runtime{nullptr}; -#if !defined(_WIN32) - constexpr char symbolName[] = "_ZN8nvinfer14safe18createInferRuntimeERNS_7ILoggerE"; - typedef nvinfer1::safe::IRuntime* (*CreateInferRuntimeFn)(nvinfer1::ILogger & logger); - if (hasSafeRuntime()) - { - auto createFn = reinterpret_cast(dlsym(safeRuntimeLibrary.get(), symbolName)); - if (createFn != nullptr) - { - runtime = createFn(logger); - } - } -#endif - return runtime; -} - -bool hasConsistencyChecker() -{ - bool ret{false}; -#if !defined(_WIN32) - ret = (consistencyCheckerLibrary != nullptr); -#endif - return ret; -} - -nvinfer1::consistency::IConsistencyChecker* createConsistencyChecker( - nvinfer1::ILogger& logger, void const* serializedEngine, int32_t const engineSize) noexcept -{ - nvinfer1::consistency::IConsistencyChecker* checker{nullptr}; - - if (serializedEngine == nullptr || engineSize == 0) - { - return checker; - } - -#if !defined(_WIN32) - constexpr char symbolName[] = "createConsistencyChecker_INTERNAL"; - typedef nvinfer1::consistency::IConsistencyChecker* (*CreateCheckerFn)( - nvinfer1::ILogger * logger, void const* data, size_t size, uint32_t version); - if (hasSafeRuntime()) - { - auto createFn = reinterpret_cast(dlsym(consistencyCheckerLibrary.get(), symbolName)); - if (createFn != nullptr) - { - checker = createFn(&logger, serializedEngine, engineSize, NV_TENSORRT_VERSION); - } - } -#endif - return checker; -} - -bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize) -{ - - if (!hasConsistencyChecker()) - { - sample::gLogError << "Cannot perform consistency check because the checker is not loaded.." << std::endl; - return false; - } - auto checker = std::unique_ptr( - createConsistencyChecker(sample::gLogger.getTRTLogger(), serializedEngine, engineSize)); - if (checker.get() == nullptr) - { - sample::gLogError << "Failed to create consistency checker." << std::endl; - return false; - } - sample::gLogInfo << "Start consistency checking." << std::endl; - if (!checker->validate()) - { - sample::gLogError << "Consistency validation failed." << std::endl; - return false; - } - sample::gLogInfo << "Consistency validation passed." << std::endl; - return true; -} } // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleEngines.h b/src/Detector/tensorrt_yolo/common/sampleEngines.h index 620b51a1..ec02e909 100644 --- a/src/Detector/tensorrt_yolo/common/sampleEngines.h +++ b/src/Detector/tensorrt_yolo/common/sampleEngines.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,58 +18,227 @@ #ifndef TRT_SAMPLE_ENGINES_H #define TRT_SAMPLE_ENGINES_H -#include -#include - #include "NvInfer.h" - -#if (NV_TENSORRT_MAJOR > 7) - -#include "NvInferConsistency.h" -#include "NvInferSafeRuntime.h" - -#endif - #include "NvOnnxParser.h" #include "sampleOptions.h" #include "sampleUtils.h" +#include "streamReader.h" +#include +#include namespace sample { struct Parser { - TrtUniquePtr onnxParser; + std::unique_ptr onnxParser; operator bool() const { - return onnxParser.operator bool(); + return onnxParser != nullptr; } }; -struct BuildEnvironment +//! +//! \brief Helper struct to faciliate engine serialization and deserialization. It does not own the underlying memory. +//! +struct EngineBlob { - TrtUniquePtr network; - //! Parser that creates the network. Must be declared *after* network, so that when - //! ~BuildEnvironment() executes, the parser is destroyed before the network is destroyed. - Parser parser; - TrtUniquePtr engine; - std::unique_ptr safeEngine; - std::vector engineBlob; + EngineBlob(void* engineData, size_t engineSize) + : data(engineData) + , size(engineSize) + { + } + void* data{}; + size_t size{}; + bool empty() const + { + return size == 0; + } }; //! -//! \brief Generate a network definition for a given model -//! -//! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid -//! parser (the returned parser converts to false if tested) +//! \brief A helper class to hold a serialized engine (std or safe) and only deserialize it when being accessed. //! -//! Constant input dimensions in the model must not be changed in the corresponding -//! network definition, because its correctness may rely on the constants. -//! -//! \see Parser::operator bool() -//! -Parser modelToNetwork(const ModelOptions& model, nvinfer1::INetworkDefinition& network, std::ostream& err); +class LazilyDeserializedEngine +{ +public: + //! + //! \brief Delete default constructor to make sure isSafe and DLACore are always set. + //! + LazilyDeserializedEngine() = delete; + + //! + //! \brief Constructor of LazilyDeserializedEngine. + //! + LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, + nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath) + : mIsSafe(isSafe) + , mVersionCompatible(versionCompatible) + , mDLACore(DLACore) + , mTempdir(tempdir) + , mTempfileControls(tempfileControls) + , mLeanDLLPath(leanDLLPath) + { + mFileReader = std::make_unique(); + } + + //! + //! \brief Move from another LazilyDeserializedEngine. + //! + LazilyDeserializedEngine(LazilyDeserializedEngine&& other) = default; + + //! + //! \brief Delete copy constructor. + //! + LazilyDeserializedEngine(LazilyDeserializedEngine const& other) = delete; + + //! + //! \brief Get the pointer to the ICudaEngine. Triggers deserialization if not already done so. + //! + nvinfer1::ICudaEngine* get(); + + //! + //! \brief Get the pointer to the ICudaEngine and release the ownership. + //! + nvinfer1::ICudaEngine* release(); + + //! + //! \brief Get the underlying blob storing serialized engine. + //! + EngineBlob const getBlob() const + { + ASSERT((!mFileReader || !mFileReader->isOpen()) + && "Attempting to access the glob when there is an open file reader!"); + if (!mEngineBlob.empty()) + { + return EngineBlob{const_cast(static_cast(mEngineBlob.data())), mEngineBlob.size()}; + } + if (mEngineBlobHostMemory.get() != nullptr && mEngineBlobHostMemory->size() > 0) + { + return EngineBlob{mEngineBlobHostMemory->data(), mEngineBlobHostMemory->size()}; + } + ASSERT(false && "Attempting to access an empty engine!"); + return EngineBlob{nullptr, 0}; + } + + //! + //! \brief Set the underlying blob storing the serialized engine without duplicating IHostMemory. + //! + void setBlob(std::unique_ptr& data) + { + ASSERT(data.get() && data->size() > 0); + mEngineBlobHostMemory = std::move(data); + mEngine.reset(); + } + + //! + //! \brief Set the underlying blob storing the serialized engine without duplicating vector memory. + //! + void setBlob(std::vector&& engineBlob) + { + mEngineBlob = std::move(engineBlob); + mEngine.reset(); + } + + //! + //! \brief Release the underlying blob without deleting the deserialized engine. + //! + void releaseBlob() + { + mEngineBlob.clear(); + mEngineBlobHostMemory.reset(); + } + + //! + //! \brief Get the file stream reader used for deserialization + //! + samplesCommon::FileStreamReader& getFileReader() + { + ASSERT(mFileReader); + return *mFileReader; + } + + //! + //! \brief Get if safe mode is enabled. + //! + bool isSafe() + { + return mIsSafe; + } + + void setDynamicPlugins(std::vector const& dynamicPlugins) + { + mDynamicPlugins = dynamicPlugins; + } + +private: + bool mIsSafe{false}; + bool mVersionCompatible{false}; + int32_t mDLACore{-1}; + std::vector mEngineBlob; + std::unique_ptr mFileReader; + + // Directly use the host memory of a serialized engine instead of duplicating the engine in CPU memory. + std::unique_ptr mEngineBlobHostMemory; + + std::string mTempdir{}; + nvinfer1::TempfileControlFlags mTempfileControls{getTempfileControlDefaults()}; + std::string mLeanDLLPath{}; + std::vector mDynamicPlugins; + + //! \name Owned TensorRT objects + //! Per TensorRT object lifetime requirements as outlined in the developer guide, + //! the runtime must remain live while any engines created by the runtime are live. + //! DO NOT ADJUST the declaration order here: runtime -> (engine). + //! Destruction occurs in reverse declaration order: (engine) -> runtime. + //!@{ + + //! The runtime used to track parent of mRuntime if one exists. + //! Needed to load mRuntime if lean.so is supplied through file system path. + std::unique_ptr mParentRuntime{}; + + //! The runtime that is used to deserialize the engine. + std::unique_ptr mRuntime{}; + + //! If mIsSafe is false, this points to the deserialized std engine + std::unique_ptr mEngine{}; + + //!@} +}; + +struct BuildEnvironment +{ + BuildEnvironment() = delete; + BuildEnvironment(BuildEnvironment const& other) = delete; + BuildEnvironment(BuildEnvironment&& other) = delete; + BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, + nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "") + : engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath) + { + } + + //! \name Owned TensorRT objects + //! Per TensorRT object lifetime requirements as outlined in the developer guide, + //! factory objects must remain live while the objects created by those factories + //! are live (with the exception of builder -> engine). + //! DO NOT ADJUST the declaration order here: builder -> network -> parser. + //! Destruction occurs in reverse declaration order: parser -> network -> builder. + //!@{ + + //! The builder used to build the engine. + std::unique_ptr builder; + + //! The network used by the builder. + std::unique_ptr network; + + //! The parser used to specify the network. + Parser parser; + + //! The engine. + LazilyDeserializedEngine engine; + //!@} +}; //! //! \brief Set up network and config @@ -89,95 +259,63 @@ void dumpRefittable(nvinfer1::ICudaEngine& engine); //! //! \return Pointer to the engine loaded or nullptr if the operation failed //! -nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, std::ostream& err); +nvinfer1::ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err); //! //! \brief Save an engine into a file //! //! \return boolean Return true if the engine was successfully saved //! -bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName, std::ostream& err); +bool saveEngine(nvinfer1::ICudaEngine const& engine, std::string const& fileName, std::ostream& err); //! //! \brief Create an engine from model or serialized file, and optionally save engine //! //! \return Pointer to the engine created or nullptr if the creation failed //! -bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, - BuildEnvironment& env, std::ostream& err); - -//! -//! \brief Create an engine from model or serialized file, and optionally save engine -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -inline TrtUniquePtr getEngine( - const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err) -{ - BuildEnvironment env; - TrtUniquePtr engine; - if (getEngineBuildEnv(model, build, sys, env, err)) - { - engine.swap(env.engine); - } - return engine; -} +bool getEngineBuildEnv( + ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err); //! //! \brief Create a serialized network //! //! \return Pointer to a host memory for a serialized network //! -nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, std::ostream& err); +nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys, + nvinfer1::IBuilder& builder, nvinfer1::INetworkDefinition& network, std::ostream& err); //! //! \brief Tranfer model to a serialized network //! //! \return Pointer to a host memory for a serialized network //! -nvinfer1::IHostMemory* modelToSerialized(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); +nvinfer1::IHostMemory* modelToSerialized( + const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); //! //! \brief Serialize network and save it into a file //! //! \return boolean Return true if the network was successfully serialized and saved //! -bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); +bool serializeAndSave( + const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngine& engine, bool multiThreading); //! //! \brief Set tensor scales from a calibration table //! -void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, const std::vector& inputFormats, - const std::vector& outputFormats, const std::string& calibrationFile); +void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector const& inputFormats, + std::vector const& outputFormats, std::string const& calibrationFile); //! //! \brief Check if safe runtime is loaded. //! bool hasSafeRuntime(); -//! -//! \brief Create a safe runtime object if the dynamic library is loaded. -//! -nvinfer1::safe::IRuntime* createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept; - -//! -//! \brief Check if consistency checker is loaded. -//! -bool hasConsistencyChecker(); +bool loadStreamingEngineToBuildEnv(std::string const& engine, BuildEnvironment& env, std::ostream& err); -//! -//! \brief Create a consistency checker object if the dynamic library is loaded. -//! -nvinfer1::consistency::IConsistencyChecker* createConsistencyChecker( - nvinfer1::ILogger& logger, nvinfer1::IHostMemory const* engine) noexcept; - -//! -//! \brief Run consistency check on serialized engine. -//! -bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize); +bool loadEngineToBuildEnv(std::string const& engine, BuildEnvironment& env, std::ostream& err); } // namespace sample #endif // TRT_SAMPLE_ENGINES_H diff --git a/src/Detector/tensorrt_yolo/common/sampleEntrypoints.h b/src/Detector/tensorrt_yolo/common/sampleEntrypoints.h new file mode 100644 index 00000000..cc8bf1b9 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/sampleEntrypoints.h @@ -0,0 +1,101 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_ENTRYPOINTS_H +#define TRT_SAMPLE_ENTRYPOINTS_H + +//! \file sampleEntrypoints.h +//! +//! Declares and conditionally defines entrypoints needed to create base TensorRT objects, depending +//! on whether the given sample uses TRT at link time or dynamically. Since common code is built once +//! and shared across all samples (both link-time and dynamic TRT), it does not define these entrypoints, +//! so each sample must define them individually. +//! +//! Samples that use TRT at link time can define DEFINE_TRT_ENTRYPOINTS before including this header to +//! pick up the definitions here. + +#include "NvInfer.h" +#include "NvOnnxParser.h" +#include "logger.h" + +extern nvinfer1::IBuilder* createBuilder(); +extern nvinfer1::IRuntime* createRuntime(); +extern nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine); + +extern nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network); + +#if !defined(DEFINE_TRT_ENTRYPOINTS) +#define DEFINE_TRT_ENTRYPOINTS 0 +#endif + +// Allow opting out of individual entrypoints that are unused by the sample +#if !defined(DEFINE_TRT_BUILDER_ENTRYPOINT) +#define DEFINE_TRT_BUILDER_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_RUNTIME_ENTRYPOINT) +#define DEFINE_TRT_RUNTIME_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_REFITTER_ENTRYPOINT) +#define DEFINE_TRT_REFITTER_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_ONNX_PARSER_ENTRYPOINT) +#define DEFINE_TRT_ONNX_PARSER_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT) +#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 1 +#endif + +#if DEFINE_TRT_ENTRYPOINTS +nvinfer1::IBuilder* createBuilder() +{ +#if DEFINE_TRT_BUILDER_ENTRYPOINT + return nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +nvinfer1::IRuntime* createRuntime() +{ +#if DEFINE_TRT_RUNTIME_ENTRYPOINT + return nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine) +{ +#if DEFINE_TRT_REFITTER_ENTRYPOINT + return nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network) +{ +#if DEFINE_TRT_ONNX_PARSER_ENTRYPOINT + return nvonnxparser::createParser(network, sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +#endif // DEFINE_TRT_ENTRYPOINTS + +#endif // TRT_SAMPLE_ENTRYPOINTS_H diff --git a/src/Detector/tensorrt_yolo/common/sampleInference.cpp b/src/Detector/tensorrt_yolo/common/sampleInference.cpp index 51f16882..ca0098d4 100644 --- a/src/Detector/tensorrt_yolo/common/sampleInference.cpp +++ b/src/Detector/tensorrt_yolo/common/sampleInference.cpp @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -23,6 +24,8 @@ #include #include #include +#include +#include #include #include #include @@ -35,6 +38,7 @@ #include "NvInfer.h" #include "ErrorRecorder.h" +#include "bfloat16.h" #include "logger.h" #include "sampleDevice.h" #include "sampleEngines.h" @@ -42,22 +46,23 @@ #include "sampleOptions.h" #include "sampleReporting.h" #include "sampleUtils.h" - +using namespace nvinfer1; namespace sample { -template -bool validateTensorNames( - const MapType& map, const EngineType* engine, const int32_t endBindingIndex) +template +bool validateTensorNames(TMapType const& map, TEngineType const* engine, int32_t const endBindingIndex) { // Check if the provided input tensor names match the input tensors of the engine. // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. - for (const auto& item : map) + for (auto const& item : map) { bool tensorNameFound{false}; for (int32_t b = 0; b < endBindingIndex; ++b) { - if (engine->bindingIsInput(b) && engine->getBindingName(b) == item.first) + auto const tensorName = engine->getIOTensorName(b); + auto const tensorIOMode = engine->getTensorIOMode(tensorName); + if (tensorIOMode == nvinfer1::TensorIOMode::kINPUT && matchStringWithOneWildcard(item.first, tensorName)) { tensorNameFound = true; break; @@ -73,74 +78,86 @@ bool validateTensorNames( return true; } -template +template class FillBindingClosure { private: using InputsMap = std::unordered_map; using BindingsVector = std::vector>; - EngineType const* engine; - ContextType const* context; + TEngineType const* mEngine; + nvinfer1::IExecutionContext const* mContext; InputsMap const& inputs; BindingsVector& bindings; int32_t batch; int32_t endBindingIndex; + int32_t profileIndex; - void fillOneBinding(int32_t bindingIndex, int64_t vol) + void fillOneBinding(TensorInfo const& tensorInfo) { - auto const dims = getDims(bindingIndex); - auto const name = engine->getBindingName(bindingIndex); - auto const isInput = engine->bindingIsInput(bindingIndex); - auto const dataType = engine->getBindingDataType(bindingIndex); - auto const *bindingInOutStr = isInput ? "input" : "output"; + auto const name = tensorInfo.name; + auto const* bindingInOutStr = tensorInfo.isInput ? "Input" : "Output"; for (auto& binding : bindings) { - const auto input = inputs.find(name); - if (isInput && input != inputs.end()) + auto const input = findPlausible(inputs, name); + if (tensorInfo.isInput && input != inputs.end()) { sample::gLogInfo << "Using values loaded from " << input->second << " for input " << name << std::endl; - binding->addBinding(bindingIndex, name, isInput, vol, dataType, input->second); + binding->addBinding(tensorInfo, input->second); + } + else + { + if (tensorInfo.isInput) + { + sample::gLogInfo << "Using random values for input " << name << std::endl; + } + binding->addBinding(tensorInfo); + } + if (tensorInfo.isDynamic) + { + sample::gLogInfo << bindingInOutStr << " binding for " << name + << " is dynamic and will be created during execution using OutputAllocator." + << std::endl; } else { - sample::gLogInfo << "Using random values for " << bindingInOutStr << " " << name << std::endl; - binding->addBinding(bindingIndex, name, isInput, vol, dataType); + sample::gLogInfo << bindingInOutStr << " binding for " << name << " with dimensions " << tensorInfo.dims + << " is created." << std::endl; } - sample::gLogInfo << "Created " << bindingInOutStr <<" binding for " << name << " with dimensions " << dims << std::endl; } } bool fillAllBindings(int32_t batch, int32_t endBindingIndex) { - if (!validateTensorNames(inputs, engine, endBindingIndex)) + if (!validateTensorNames(inputs, mEngine, endBindingIndex)) { sample::gLogError << "Invalid tensor names found in --loadInputs flag." << std::endl; return false; } - for (int32_t b = 0; b < endBindingIndex; b++) { - auto const dims = getDims(b); - auto const comps = engine->getBindingComponentsPerElement(b); - auto const strides = context->getStrides(b); - int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b); - auto const vol = volume(dims, strides, vectorDimIndex, comps, batch); - fillOneBinding(b, vol); + TensorInfo tensorInfo; + tensorInfo.bindingIndex = b; + getTensorInfo(tensorInfo); + tensorInfo.updateVolume(batch); + fillOneBinding(tensorInfo); } return true; } - nvinfer1::Dims getDims(int32_t bindingIndex); + void getTensorInfo(TensorInfo& tensorInfo); public: - FillBindingClosure(EngineType const* _engine, ContextType const* _context, InputsMap const& _inputs, BindingsVector& _bindings, int32_t _batch, int32_t _endBindingIndex) - : engine(_engine) - , context(_context) + FillBindingClosure(TEngineType const* _engine, nvinfer1::IExecutionContext const* _context, + InputsMap const& _inputs, BindingsVector& _bindings, int32_t _batch, int32_t _endBindingIndex, + int32_t _profileIndex) + : mEngine(_engine) + , mContext(_context) , inputs(_inputs) , bindings(_bindings) , batch(_batch) , endBindingIndex(_endBindingIndex) + , profileIndex(_profileIndex) { } @@ -151,172 +168,364 @@ class FillBindingClosure }; template <> -nvinfer1::Dims FillBindingClosure::getDims(int32_t bindingIndex) +void FillBindingClosure::getTensorInfo(TensorInfo& tensorInfo) { - return context->getBindingDimensions(bindingIndex); + auto const b = tensorInfo.bindingIndex; + auto const name = mEngine->getIOTensorName(b); + tensorInfo.name = name; + tensorInfo.dims = mContext->getTensorShape(name); + tensorInfo.isDynamic = std::any_of( + tensorInfo.dims.d, tensorInfo.dims.d + tensorInfo.dims.nbDims, [](int32_t dim) { return dim == -1; }); + tensorInfo.comps = mEngine->getTensorComponentsPerElement(name, profileIndex); + tensorInfo.strides = mContext->getTensorStrides(name); + tensorInfo.vectorDimIndex = mEngine->getTensorVectorizedDim(name, profileIndex); + tensorInfo.isInput = mEngine->getTensorIOMode(name) == TensorIOMode::kINPUT; + tensorInfo.dataType = mEngine->getTensorDataType(name); } -template <> -nvinfer1::Dims FillBindingClosure::getDims(int32_t bindingIndex) +namespace { - return engine->getBindingDimensions(bindingIndex); +bool allocateContextMemory(InferenceEnvironment& iEnv, InferenceOptions const& inference) +{ + auto* engine = iEnv.engine.get(); + iEnv.deviceMemory.resize(inference.infStreams); + // Delay context memory allocation until input shapes are specified because runtime allocation would require actual + // input shapes. + for (int32_t i = 0; i < inference.infStreams; ++i) + { + auto const& ec = iEnv.contexts.at(i); + if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) + { + sample::gLogInfo << "Created execution context with device memory size: " + << (engine->getDeviceMemorySize() / 1.0_MiB) << " MiB" << std::endl; + } + else + { + size_t sizeToAlloc{0}; + const char* allocReason{nullptr}; + if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kPROFILE) + { + auto const p = inference.optProfileIndex; + sizeToAlloc = engine->getDeviceMemorySizeForProfile(p); + allocReason = "current profile"; + } + else if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kRUNTIME) + { + sizeToAlloc = ec->updateDeviceMemorySizeForShapes(); + allocReason = "current input shapes"; + } + else + { + sample::gLogError << "Unrecognizable memory allocation strategy." << std::endl; + return false; + } + iEnv.deviceMemory.at(i) = TrtDeviceBuffer(sizeToAlloc); + ec->setDeviceMemoryV2(iEnv.deviceMemory.at(i).get(), iEnv.deviceMemory.at(i).getSize()); + sample::gLogInfo << "Maximum device memory size across all profiles: " + << (engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Only allocated device memory enough for " << allocReason << ": " + << (sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; + } + } + return true; } +} // namespace -bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference) +bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system) { +#if TRT_WINML + int32_t const isIntegrated{}; +#else int32_t device{}; cudaCheck(cudaGetDevice(&device)); cudaDeviceProp properties; cudaCheck(cudaGetDeviceProperties(&properties, device)); + int32_t const isIntegrated{properties.integrated}; +#endif // Use managed memory on integrated devices when transfers are skipped // and when it is explicitly requested on the commandline. - bool useManagedMemory{(inference.skipTransfers && properties.integrated) || inference.useManaged}; - using FillSafeBindings = FillBindingClosure; - if (iEnv.safe) + bool useManagedMemory{(inference.skipTransfers && isIntegrated) || inference.useManaged}; + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); + + using FillStdBindings = FillBindingClosure; + + auto* engine = iEnv.engine.get(); + SMP_RETVAL_IF_FALSE(engine != nullptr, "Got invalid engine!", false, sample::gLogError); + + // Release serialized blob to save memory space. + iEnv.engine.releaseBlob(); + + // Setup weight streaming if enabled + if (engine->getStreamableWeightsSize() > 0) { - ASSERT(sample::hasSafeRuntime()); - auto* safeEngine = iEnv.safeEngine.get(); - for (int32_t s = 0; s < inference.streams; ++s) + auto const& budget = inference.weightStreamingBudget; + int64_t wsBudget = budget.bytes; + if (budget.percent != 100.0) + { + double const percent = budget.percent; + ASSERT(percent < 100.0); + auto const max = engine->getStreamableWeightsSize(); + wsBudget = (max >= 0) ? (percent / 100) * (max) : WeightStreamingBudget::kDISABLE; + } + + if (wsBudget == WeightStreamingBudget::kDISABLE) { - iEnv.safeContext.emplace_back(safeEngine->createExecutionContext()); - iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); + wsBudget = engine->getStreamableWeightsSize(); } - const int32_t nBindings = safeEngine->getNbBindings(); - auto const* safeContext = iEnv.safeContext.front().get(); - // batch is set to 1 because safety only support explicit batch. - return FillSafeBindings(iEnv.safeEngine.get(), safeContext, inference.inputs, iEnv.bindings, 1, nBindings)(); + else if (wsBudget == WeightStreamingBudget::kAUTOMATIC) + { + wsBudget = engine->getWeightStreamingAutomaticBudget(); + } + ASSERT(wsBudget >= 0); + bool success = engine->setWeightStreamingBudgetV2(wsBudget); + SMP_RETVAL_IF_FALSE(success, "Failed to set weight streaming limit!", false, sample::gLogError); + switch (wsBudget) + { + case WeightStreamingBudget::kDISABLE: + { + sample::gLogInfo << "Weight streaming has been disabled at runtime." << std::endl; + break; + } + + case WeightStreamingBudget::kAUTOMATIC: + { + sample::gLogInfo << "The weight streaming budget will automatically be chosen by TensorRT." << std::endl; + break; + } + default: + { + sample::gLogInfo << "Weight streaming is enabled with a device memory limit of " << wsBudget << " bytes." + << std::endl; + break; + } + } + } + + int32_t const nbOptProfiles = engine->getNbOptimizationProfiles(); + + if (inference.optProfileIndex >= nbOptProfiles) + { + sample::gLogError << "Selected profile index " << inference.optProfileIndex + << " exceeds the number of profiles that the engine holds. " << std::endl; + return false; } - using FillStdBindings = FillBindingClosure; + if (nbOptProfiles > 1 && !inference.setOptProfile) + { + sample::gLogWarning << nbOptProfiles + << " profiles detected but not set. Running with profile 0. Please use " + "--dumpOptimizationProfile to see all available profiles." + << std::endl; + } + + cudaStream_t setOptProfileStream; + CHECK(cudaStreamCreate(&setOptProfileStream)); - for (int32_t s = 0; s < inference.streams; ++s) + for (int32_t s = 0; s < inference.infStreams; ++s) { - auto ec = iEnv.engine->createExecutionContext(); + IExecutionContext* ec{nullptr}; + if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) + { + // Let TRT pre-allocate and manage the memory. + ec = engine->createExecutionContext(); + } + else + { + // Allocate based on the current profile or runtime shapes. + ec = engine->createExecutionContext(ExecutionContextAllocationStrategy::kUSER_MANAGED); + } if (ec == nullptr) { sample::gLogError << "Unable to create execution context for stream " << s << "." << std::endl; return false; } - iEnv.context.emplace_back(ec); + ec->setNvtxVerbosity(inference.nvtxVerbosity); + +#if !TRT_WINML + int32_t const persistentCacheLimit + = samplesCommon::getMaxPersistentCacheSize() * inference.persistentCacheRatio; + sample::gLogInfo << "Setting persistentCacheLimit to " << persistentCacheLimit << " bytes." << std::endl; + ec->setPersistentCacheLimit(persistentCacheLimit); +#endif + + auto setProfile = ec->setOptimizationProfileAsync(inference.optProfileIndex, setOptProfileStream); + CHECK(cudaStreamSynchronize(setOptProfileStream)); + + if (!setProfile) + { + sample::gLogError << "Set optimization profile failed. " << std::endl; + if (inference.infStreams > 1) + { + sample::gLogError + << "Please ensure that the engine is built with preview feature profileSharing0806 enabled. " + << std::endl; + } + return false; + } + + iEnv.contexts.emplace_back(ec); iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); } + + CHECK(cudaStreamDestroy(setOptProfileStream)); + if (iEnv.profiler) { - iEnv.context.front()->setProfiler(iEnv.profiler.get()); + iEnv.contexts.front()->setProfiler(iEnv.profiler.get()); // Always run reportToProfiler() after enqueue launch - iEnv.context.front()->setEnqueueEmitsProfile(false); + iEnv.contexts.front()->setEnqueueEmitsProfile(false); } - const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles(); - const int32_t nBindings = iEnv.engine->getNbBindings(); - const int32_t bindingsInProfile = nOptProfiles > 0 ? nBindings / nOptProfiles : 0; - const int32_t endBindingIndex = bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings(); - - if (nOptProfiles > 1) - { - sample::gLogWarning << "Multiple profiles are currently not supported. Running with one profile." << std::endl; - } + int32_t const endBindingIndex = engine->getNbIOTensors(); // Make sure that the tensor names provided in command-line args actually exist in any of the engine bindings // to avoid silent typos. - if (!validateTensorNames(inference.shapes, iEnv.engine.get(), endBindingIndex)) + if (!validateTensorNames(inference.shapes, engine, endBindingIndex)) { sample::gLogError << "Invalid tensor names found in --shapes flag." << std::endl; return false; } - // Set all input dimensions before all bindings can be allocated for (int32_t b = 0; b < endBindingIndex; ++b) { - if (iEnv.engine->bindingIsInput(b)) + auto const& name = engine->getIOTensorName(b); + auto const& mode = engine->getTensorIOMode(name); + if (mode == TensorIOMode::kINPUT) { - auto dims = iEnv.context.front()->getBindingDimensions(b); - const bool isScalar = dims.nbDims == 0; - const bool isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) - || iEnv.engine->isShapeBinding(b); - if (isDynamicInput) + Dims const dims = iEnv.contexts.front()->getTensorShape(name); + bool isShapeInferenceIO{false}; + isShapeInferenceIO = engine->isShapeInferenceIO(name); + bool const hasRuntimeDim = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); + auto const shape = findPlausible(inference.shapes, name); + if (hasRuntimeDim || isShapeInferenceIO) { - auto shape = inference.shapes.find(iEnv.engine->getBindingName(b)); + // Set shapeData to either dimensions of the input (if it has a dynamic shape) + // or set to values of the input (if it is an input shape tensor). + std::vector shapeData; - std::vector staticDims; if (shape == inference.shapes.end()) { - // If no shape is provided, set dynamic dimensions to 1. - constexpr int32_t DEFAULT_DIMENSION = 1; - if (iEnv.engine->isShapeBinding(b)) + // No information provided. Use default value for missing data. + constexpr int32_t kDEFAULT_VALUE = 1; + if (isShapeInferenceIO) { - if (isScalar) - { - staticDims.push_back(1); - } - else - { - staticDims.resize(dims.d[0]); - std::fill(staticDims.begin(), staticDims.end(), DEFAULT_DIMENSION); - } + // Set shape tensor to all ones. + shapeData.assign(volume(dims, 0, dims.nbDims), kDEFAULT_VALUE); + sample::gLogWarning << "Values missing for input shape tensor: " << name + << "Automatically setting values to: " << shapeData << std::endl; } else { - staticDims.resize(dims.nbDims); - std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), - [&](int32_t dimension) { return dimension >= 0 ? dimension : DEFAULT_DIMENSION; }); + // Use default value for unspecified runtime dimensions. + shapeData.resize(dims.nbDims); + std::transform(dims.d, dims.d + dims.nbDims, shapeData.begin(), + [&](int32_t dimension) { return dimension >= 0 ? dimension : kDEFAULT_VALUE; }); + sample::gLogWarning << "Shape missing for input with dynamic shape: " << name + << "Automatically setting shape to: " << shapeData << std::endl; } - sample::gLogWarning << "Dynamic dimensions required for input: " << iEnv.engine->getBindingName(b) - << ", but no shapes were provided. Automatically overriding shape to: " - << staticDims << std::endl; } - else if (inference.inputs.count(shape->first) && iEnv.engine->isShapeBinding(b)) + else if (inference.inputs.count(shape->first) && isShapeInferenceIO) { - if (isScalar || dims.nbDims == 1) - { - // Load shape tensor from file. - size_t const size = isScalar ? 1 : dims.d[0]; - staticDims.resize(size); - auto const& filename = inference.inputs.at(shape->first); - auto dst = reinterpret_cast(staticDims.data()); - loadFromFile(filename, dst, size * sizeof(decltype(staticDims)::value_type)); - } - else - { - sample::gLogWarning << "Cannot load shape tensor " << shape->first << " from file, " - << "ND-Shape isn't supported yet" << std::endl; - // Fallback - staticDims = shape->second; - } + // Load shape tensor from file. + int64_t const size = volume(dims, 0, dims.nbDims); + shapeData.resize(size); + auto const& filename = inference.inputs.at(shape->first); + auto dst = reinterpret_cast(shapeData.data()); + loadFromFile(filename, dst, size * sizeof(decltype(shapeData)::value_type)); } else { - staticDims = shape->second; + shapeData = shape->second; + } + + int32_t* shapeTensorData{nullptr}; + if (isShapeInferenceIO) + { + // Save the data in iEnv, in a way that it's address does not change + // before enqueueV3 is called. + iEnv.inputShapeTensorValues.emplace_back(shapeData); + shapeTensorData = iEnv.inputShapeTensorValues.back().data(); } - for (auto& c : iEnv.context) + for (auto& c : iEnv.contexts) { - if (iEnv.engine->isShapeBinding(b)) + if (isShapeInferenceIO) { - if (!c->setInputShapeBinding(b, staticDims.data())) + sample::gLogInfo << "Set input shape tensor " << name << " to: " << shapeData << std::endl; + if (!c->setTensorAddress(name, shapeTensorData)) { return false; } } else { - if (!c->setBindingDimensions(b, toDims(staticDims))) + sample::gLogInfo << "Set shape of input tensor " << name << " to: " << shapeData + << std::endl; + if (!c->setInputShape(name, toDims(shapeData))) { return false; } } } } + else if (nbOptProfiles && shape != inference.shapes.end()) + { + // Check if the provided shape matches the static dimensions in the engine. + for (auto& c : iEnv.contexts) + { + if (!c->setInputShape(name, toDims(shape->second))) + { + sample::gLogError << "The engine was built with static shapes for input tensor " << name + << " but the provided shapes do not match the static shapes!" << std::endl; + return false; + } + } + } } } - auto* engine = iEnv.engine.get(); - auto const* context = iEnv.context.front().get(); - int32_t const batch = engine->hasImplicitBatchDimension() ? inference.batch : 1; - return FillStdBindings(engine, context, inference.inputs, iEnv.bindings, batch, endBindingIndex)(); + // Create Debug Listener and turn on debug states if client requested dumping debug tensors. + if (!inference.debugTensorFileNames.empty()) + { + iEnv.listener.reset(new DebugTensorWriter(inference.debugTensorFileNames)); + iEnv.contexts.front()->setDebugListener(iEnv.listener.get()); + for (auto const& s : inference.debugTensorFileNames) + { + iEnv.contexts.front()->setTensorDebugState(s.first.c_str(), true); + } + } + + if (!allocateContextMemory(iEnv, inference)) + { + return false; + } + + auto const* context = iEnv.contexts.front().get(); + return FillStdBindings( + engine, context, inference.inputs, iEnv.bindings, 1, endBindingIndex, inference.optProfileIndex)(); } +TaskInferenceEnvironment::TaskInferenceEnvironment( + std::string engineFile, InferenceOptions inference, int32_t deviceId, int32_t DLACore, int32_t bs) + : iOptions(inference) + , device(deviceId) + , batch(bs) +{ + BuildEnvironment bEnv(/* isSafe */ false, /* versionCompatible */ false, DLACore, "", getTempfileControlDefaults()); + loadEngineToBuildEnv(engineFile, bEnv, sample::gLogError); + std::unique_ptr tmp(new InferenceEnvironment(bEnv)); + iEnv = std::move(tmp); + + cudaCheck(cudaSetDevice(device)); + SystemOptions system{}; + system.device = device; + system.DLACore = DLACore; + if (!setUpInference(*iEnv, iOptions, system)) + { + sample::gLogError << "Inference set up failed" << std::endl; + } +} namespace { @@ -353,74 +562,60 @@ struct SyncStruct struct Enqueue { - explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers) + explicit Enqueue(nvinfer1::IExecutionContext& context) : mContext(context) - , mBuffers(buffers) { } nvinfer1::IExecutionContext& mContext; - void** mBuffers{}; }; //! -//! \class EnqueueImplicit -//! \brief Functor to enqueue inference with implict batch +//! \class EnqueueExplicit +//! \brief Functor to enqueue inference with explict batch //! -class EnqueueImplicit : private Enqueue +class EnqueueExplicit : private Enqueue { public: - explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers, int32_t batch) - : Enqueue(context, buffers) - , mBatch(batch) + explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, Bindings const& bindings) + : Enqueue(context) + , mBindings(bindings) { + ASSERT(mBindings.setTensorAddresses(mContext)); } bool operator()(TrtCudaStream& stream) const { - if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) + try { - // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) + bool const result = mContext.enqueueV3(stream.get()); + // Collecting layer timing info from current profile index of execution context, except under capturing + // mode. + if (!isStreamCapturing(stream) && mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() + && !mContext.reportToProfiler()) { - gLogWarning << "Failed to collect layer timing info from previous enqueue()" << std::endl; + gLogWarning << "Failed to collect layer timing info from previous enqueueV3()" << std::endl; } - return true; + return result; + } + catch (const std::exception&) + { + return false; } return false; } private: - int32_t mBatch; -}; - -//! -//! \class EnqueueExplicit -//! \brief Functor to enqueue inference with explict batch -//! -class EnqueueExplicit : private Enqueue -{ - -public: - explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers) - : Enqueue(context, buffers) + // Helper function to check if a stream is in capturing mode. + bool isStreamCapturing(TrtCudaStream& stream) const { + cudaStreamCaptureStatus status{cudaStreamCaptureStatusNone}; + cudaCheck(cudaStreamIsCapturing(stream.get(), &status)); + return status != cudaStreamCaptureStatusNone; } - bool operator()(TrtCudaStream& stream) const - { - if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) - { - // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) - { - gLogWarning << "Failed to collect layer timing info from previous enqueueV2()" << std::endl; - } - return true; - } - return false; - } + Bindings const& mBindings; }; //! @@ -442,7 +637,7 @@ class EnqueueGraph if (mGraph.launch(stream)) { // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.reportToProfiler()) + if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) { gLogWarning << "Failed to collect layer timing info from previous CUDA graph launch" << std::endl; } @@ -456,29 +651,24 @@ class EnqueueGraph }; //! -//! \class EnqueueSafe -//! \brief Functor to enqueue safe execution context +//! \class EnqueueGraphSafe +//! \brief Functor to enqueue inference from CUDA Graph //! -class EnqueueSafe +class EnqueueGraphSafe { + public: - explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context, void** buffers) - : mContext(context) - , mBuffers(buffers) + explicit EnqueueGraphSafe(TrtCudaGraph& graph) + : mGraph(graph) { } bool operator()(TrtCudaStream& stream) const { - if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) - { - return true; - } - return false; + return mGraph.launch(stream); } - nvinfer1::safe::IExecutionContext& mContext; - void** mBuffers{}; + TrtCudaGraph& mGraph; }; using EnqueueFunction = std::function; @@ -512,12 +702,11 @@ using EnqueueTimes = std::array; //! \class Iteration //! \brief Inference iteration and streams management //! -template class Iteration { public: - Iteration(int32_t id, const InferenceOptions& inference, ContextType& context, Bindings& bindings) + Iteration(int32_t id, InferenceOptions const& inference, nvinfer1::IExecutionContext& context, Bindings& bindings) : mBindings(bindings) , mStreamId(id) , mDepth(1 + inference.overlap) @@ -546,7 +735,7 @@ class Iteration if (!skipTransfers) { record(EventType::kINPUT_S, StreamType::kINPUT); - mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + setInputData(false); record(EventType::kINPUT_E, StreamType::kINPUT); wait(EventType::kINPUT_E, StreamType::kCOMPUTE); // Wait for input DMA before compute } @@ -564,7 +753,7 @@ class Iteration { wait(EventType::kCOMPUTE_E, StreamType::kOUTPUT); // Wait for compute before output DMA record(EventType::kOUTPUT_S, StreamType::kOUTPUT); - mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + fetchOutputData(false); record(EventType::kOUTPUT_E, StreamType::kOUTPUT); } @@ -574,7 +763,7 @@ class Iteration } float sync( - const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, std::vector& trace, bool skipTransfers) + TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, std::vector& trace, bool skipTransfers) { if (mActive[mNext]) { @@ -594,7 +783,7 @@ class Iteration } void syncAll( - const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, std::vector& trace, bool skipTransfers) + TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, std::vector& trace, bool skipTransfers) { for (int32_t d = 0; d < mDepth; ++d) { @@ -608,14 +797,24 @@ class Iteration getStream(StreamType::kINPUT).wait(gpuStart); } - void setInputData() + void setInputData(bool sync) { mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + // additional sync to avoid overlapping with inference execution. + if (sync) + { + getStream(StreamType::kINPUT).synchronize(); + } } - void fetchOutputData() + void fetchOutputData(bool sync) { mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + // additional sync to avoid overlapping with inference execution. + if (sync) + { + getStream(StreamType::kOUTPUT).synchronize(); + } } private: @@ -655,12 +854,16 @@ class Iteration getStream(s).wait(getEvent(e)); } - InferenceTrace getTrace(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, bool skipTransfers) + InferenceTrace getTrace(TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, bool skipTransfers) { - float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_S) - gpuStart; - float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_E) - gpuStart; - float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_S) - gpuStart; - float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_E) - gpuStart; + float is + = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_S) - gpuStart; + float ie + = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_E) - gpuStart; + float os + = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_S) - gpuStart; + float oe + = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_E) - gpuStart; return InferenceTrace(mStreamId, std::chrono::duration(getEnqueueTime(true) - cpuStart).count(), @@ -668,19 +871,22 @@ class Iteration getEvent(EventType::kCOMPUTE_S) - gpuStart, getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe); } - void createEnqueueFunction(const InferenceOptions& inference, nvinfer1::IExecutionContext& context, Bindings& /*bindings*/) + void createEnqueueFunction( + InferenceOptions const& inference, nvinfer1::IExecutionContext& context, Bindings& bindings) { - if (inference.batch) - mEnqueue = EnqueueFunction(EnqueueImplicit(context, mBindings.getDeviceBuffers(), inference.batch)); - else - mEnqueue = EnqueueFunction(EnqueueExplicit(context, mBindings.getDeviceBuffers())); - + mEnqueue = EnqueueFunction(EnqueueExplicit(context, mBindings)); if (inference.graph) { + sample::gLogInfo << "Capturing CUDA graph for the current execution context" << std::endl; + TrtCudaStream& stream = getStream(StreamType::kCOMPUTE); - // Avoid capturing initialization calls by executing the enqueue function at least once before starting CUDA graph capture. - const auto ret = mEnqueue(stream); - assert(ret); + // Avoid capturing initialization calls by executing the enqueue function at least + // once before starting CUDA graph capture. + auto const ret = mEnqueue(stream); + if (!ret) + { + throw std::runtime_error("Inference enqueue failed."); + } stream.synchronize(); mGraph.beginCapture(stream); @@ -690,6 +896,7 @@ class Iteration { mGraph.endCapture(stream); mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph)); + sample::gLogInfo << "Successfully captured CUDA graph for the current execution context" << std::endl; } else { @@ -706,11 +913,6 @@ class Iteration } } - void createEnqueueFunction(const InferenceOptions&, nvinfer1::safe::IExecutionContext& context, Bindings&) - { - mEnqueue = EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers())); - } - Bindings& mBindings; TrtCudaGraph mGraph; @@ -726,23 +928,44 @@ class Iteration int32_t enqueueStart{0}; std::vector mEnqueueTimes; - ContextType* mContext{nullptr}; + nvinfer1::IExecutionContext* mContext{nullptr}; }; -template -bool inferenceLoop(std::vector>>& iStreams, const TimePoint& cpuStart, - const TrtCudaEvent& gpuStart, int iterations, float maxDurationMs, float warmupMs, +bool inferenceLoop(std::vector>& iStreams, TimePoint const& cpuStart, + TrtCudaEvent const& gpuStart, int iterations, float maxDurationMs, float warmupMs, std::vector& trace, bool skipTransfers, float idleMs) { float durationMs = 0; int32_t skip = 0; + if (maxDurationMs == -1.F) + { + sample::gLogWarning << "--duration=-1 is specified, inference will run in an endless loop until" + << " aborted with CTRL-C (SIGINT)" << std::endl; + while (true) + { + for (auto& s : iStreams) + { + if (!s->query(skipTransfers)) + { + return false; + } + } + for (auto& s : iStreams) + { + s->sync(cpuStart, gpuStart, trace, skipTransfers); + } + } + } + for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs; ++i) { for (auto& s : iStreams) { if (!s->query(skipTransfers)) + { return false; + } } for (auto& s : iStreams) { @@ -751,12 +974,15 @@ bool inferenceLoop(std::vector>>& iStream if (durationMs < warmupMs) // Warming up { if (durationMs) // Skip complete iterations + { ++skip; - + } continue; } if (idleMs != 0.F) + { std::this_thread::sleep_for(std::chrono::duration(idleMs)); + } } for (auto& s : iStreams) { @@ -765,74 +991,81 @@ bool inferenceLoop(std::vector>>& iStream return true; } -template -void inferenceExecution(const InferenceOptions& inference, InferenceEnvironment& iEnv, SyncStruct& sync, - const int32_t threadIdx, const int32_t streamsPerThread, int32_t device, std::vector& trace) +void inferenceExecution(InferenceOptions const& inference, InferenceEnvironment& iEnv, SyncStruct& sync, + int32_t const threadIdx, int32_t const streamsPerThread, int32_t device, + std::vector& trace) noexcept { - float warmupMs = inference.warmup; - float durationMs = inference.duration * 1000.F + warmupMs; - - cudaCheck(cudaSetDevice(device)); - - std::vector>> iStreams; - - for (int32_t s = 0; s < streamsPerThread; ++s) + try { - const int32_t streamId{threadIdx * streamsPerThread + s}; - auto* iteration = new Iteration( - streamId, inference, *iEnv.template getContext(streamId), *iEnv.bindings[streamId]); - if (inference.skipTransfers) + float warmupMs = inference.warmup; + float durationMs = -1.F; + if (inference.duration != -1.F) { - iteration->setInputData(); + durationMs = inference.duration * 1000.F + warmupMs; } - iStreams.emplace_back(iteration); - } - for (auto& s : iStreams) - { - s->wait(sync.gpuStart); - } + cudaCheck(cudaSetDevice(device)); - std::vector localTrace; - if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, inference.iterations, durationMs, warmupMs, localTrace, - inference.skipTransfers, inference.idle)) - { - iEnv.error = true; - } + std::vector> iStreams; + + for (int32_t s = 0; s < streamsPerThread; ++s) + { + int32_t const streamId{threadIdx * streamsPerThread + s}; + auto* iteration = new Iteration(streamId, inference, *iEnv.getContext(streamId), *iEnv.bindings[streamId]); + if (inference.skipTransfers) + { + iteration->setInputData(true); + } + iStreams.emplace_back(iteration); + } - if (inference.skipTransfers) - { for (auto& s : iStreams) { - s->fetchOutputData(); + s->wait(sync.gpuStart); } - } - sync.mutex.lock(); - trace.insert(trace.end(), localTrace.begin(), localTrace.end()); - sync.mutex.unlock(); -} + std::vector localTrace; + if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, inference.iterations, durationMs, warmupMs, + localTrace, inference.skipTransfers, inference.idle)) + { + sync.mutex.lock(); + iEnv.error = true; + sync.mutex.unlock(); + } -inline std::thread makeThread(const InferenceOptions& inference, InferenceEnvironment& iEnv, SyncStruct& sync, - int32_t threadIdx, int32_t streamsPerThread, int32_t device, std::vector& trace) -{ + if (inference.skipTransfers) + { + for (auto& s : iStreams) + { + s->fetchOutputData(true); + } + } - if (iEnv.safe) + sync.mutex.lock(); + trace.insert(trace.end(), localTrace.begin(), localTrace.end()); + sync.mutex.unlock(); + } + catch (...) { - ASSERT(sample::hasSafeRuntime()); - return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), - std::ref(sync), threadIdx, streamsPerThread, device, std::ref(trace)); + sync.mutex.lock(); + iEnv.error = true; + sync.mutex.unlock(); } +} - return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), - std::ref(sync), threadIdx, streamsPerThread, device, std::ref(trace)); +inline std::thread makeThread(InferenceOptions const& inference, InferenceEnvironment& iEnv, SyncStruct& sync, + int32_t threadIdx, int32_t streamsPerThread, int32_t device, std::vector& trace) +{ + return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), std::ref(sync), threadIdx, + streamsPerThread, device, std::ref(trace)); } } // namespace bool runInference( - const InferenceOptions& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace) + InferenceOptions const& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace) { + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); cudaCheck(cudaProfilerStart()); trace.resize(0); @@ -846,8 +1079,8 @@ bool runInference( // When multiple streams are used, trtexec can run inference in two modes: // (1) if inference.threads is true, then run each stream on each thread. // (2) if inference.threads is false, then run all streams on the same thread. - const int32_t numThreads = inference.threads ? inference.streams : 1; - const int32_t streamsPerThread = inference.threads ? 1 : inference.streams; + int32_t const numThreads = inference.threads ? inference.infStreams : 1; + int32_t const streamsPerThread = inference.threads ? 1 : inference.infStreams; std::vector threads; for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) @@ -861,12 +1094,47 @@ bool runInference( cudaCheck(cudaProfilerStop()); - auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) { return a.h2dStart < b.h2dStart; }; + auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; std::sort(trace.begin(), trace.end(), cmpTrace); return !iEnv.error; } +bool runMultiTasksInference(std::vector>& tEnvList) +{ + cudaCheck(cudaProfilerStart()); + cudaSetDeviceFlags(cudaDeviceScheduleSpin); + + SyncStruct sync; + sync.sleep = 0; + sync.mainStream.sleep(&sync.sleep); + sync.cpuStart = getCurrentTime(); + sync.gpuStart.record(sync.mainStream); + + std::vector threads; + for (size_t i = 0; i < tEnvList.size(); ++i) + { + auto& tEnv = tEnvList[i]; + threads.emplace_back(makeThread( + tEnv->iOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, tEnv->device, tEnv->trace)); + } + for (auto& th : threads) + { + th.join(); + } + + cudaCheck(cudaProfilerStop()); + + auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; + for (auto& tEnv : tEnvList) + { + std::sort(tEnv->trace.begin(), tEnv->trace.end(), cmpTrace); + } + + return std::none_of(tEnvList.begin(), tEnvList.end(), + [](std::unique_ptr& tEnv) { return tEnv->iEnv->error; }); +} + namespace { size_t reportGpuMemory() @@ -889,36 +1157,31 @@ size_t reportGpuMemory() } // namespace //! Returns true if deserialization is slower than expected or fails. -bool timeDeserialize(InferenceEnvironment& iEnv) +bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys) { constexpr int32_t kNB_ITERS{20}; - std::unique_ptr rt{nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())}; - std::unique_ptr engine; + std::unique_ptr rt{createRuntime()}; + std::unique_ptr engine; - std::unique_ptr safeRT{sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - std::unique_ptr safeEngine; - - if (iEnv.safe) - { - ASSERT(sample::hasSafeRuntime() && safeRT != nullptr); - safeRT->setErrorRecorder(&gRecorder); - } + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); auto timeDeserializeFn = [&]() -> float { bool deserializeOK{false}; engine.reset(nullptr); - safeEngine.reset(nullptr); auto startClock = std::chrono::high_resolution_clock::now(); - if (iEnv.safe) - { - safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(), iEnv.engineBlob.size())); - deserializeOK = (safeEngine != nullptr); - } - else + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); + + auto& reader = iEnv.engine.getFileReader(); + reader.reset(); + ASSERT(reader.isOpen()); +#if !TRT_WINML + for (auto const& pluginPath : sys.dynamicPlugins) { - engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(), iEnv.engineBlob.size())); - deserializeOK = (engine != nullptr); + rt->getPluginRegistry().loadLibrary(pluginPath.c_str()); } +#endif + engine.reset(rt->deserializeCudaEngine(reader)); + deserializeOK = (engine != nullptr); auto endClock = std::chrono::high_resolution_clock::now(); // return NAN if deserialization failed. return deserializeOK ? std::chrono::duration(endClock - startClock).count() : NAN; @@ -935,7 +1198,7 @@ bool timeDeserialize(InferenceEnvironment& iEnv) sample::gLogInfo << "Begin deserialization engine timing..." << std::endl; float const first = timeDeserializeFn(); - // Check if first deserialization suceeded. + // Check if first deserialization succeeded. if (std::isnan(first)) { sample::gLogError << "Engine deserialization failed." << std::endl; @@ -952,10 +1215,10 @@ bool timeDeserialize(InferenceEnvironment& iEnv) { totalTime += timeDeserializeFn(); } - const auto averageTime = totalTime / kNB_ITERS; + auto const averageTime = totalTime / kNB_ITERS; // reportGpuMemory sometimes reports zero after a single deserialization of a small engine, // so use the size of memory for all the iterations. - const auto totalEngineSizeGpu = reportGpuMemory(); + auto const totalEngineSizeGpu = reportGpuMemory(); sample::gLogInfo << "Total deserialization time = " << totalTime << " milliseconds in " << kNB_ITERS << " iterations, average time = " << averageTime << " milliseconds, first time = " << first << " milliseconds." << std::endl; @@ -965,8 +1228,8 @@ bool timeDeserialize(InferenceEnvironment& iEnv) // the average deserialization, return true, which means an error occurred. // The tolerance is set to 2x since the deserialization time is quick and susceptible // to caching issues causing problems in the first timing. - const auto tolerance = 2.0F; - const bool isSlowerThanExpected = first > averageTime * tolerance; + auto const tolerance = 2.0F; + bool const isSlowerThanExpected = first > averageTime * tolerance; if (isSlowerThanExpected) { sample::gLogInfo << "First deserialization time divided by average time is " << (first / averageTime) @@ -975,16 +1238,385 @@ bool timeDeserialize(InferenceEnvironment& iEnv) return isSlowerThanExpected; } -std::string getLayerInformation(const InferenceEnvironment& iEnv, nvinfer1::LayerInformationFormat format) +std::string getLayerInformation( + nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format) { - auto runtime = std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); - auto inspector = std::unique_ptr(iEnv.engine->createEngineInspector()); - if (!iEnv.context.empty()) + auto runtime = std::unique_ptr{createRuntime()}; + auto inspector = std::unique_ptr(engine->createEngineInspector()); + if (context != nullptr) { - inspector->setExecutionContext(iEnv.context.front().get()); + inspector->setExecutionContext(context); } std::string result = inspector->getEngineInformation(format); return result; } +void Binding::fill(std::string const& fileName) +{ + loadFromFile(fileName, static_cast(buffer->getHostBuffer()), buffer->getSize()); +} + +void Binding::fill() +{ + switch (dataType) + { + case nvinfer1::DataType::kBOOL: + { + fillBuffer(buffer->getHostBuffer(), volume, 0, 1); + break; + } + case nvinfer1::DataType::kINT32: + { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } + case nvinfer1::DataType::kINT64: + { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } + case nvinfer1::DataType::kINT8: + { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } + case nvinfer1::DataType::kFLOAT: + { + fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } + case nvinfer1::DataType::kHALF: + { + fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } + case nvinfer1::DataType::kBF16: + { + fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } + case nvinfer1::DataType::kUINT8: + { + fillBuffer(buffer->getHostBuffer(), volume, 0, 255); + break; + } + case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); + } +} + +void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, int32_t spv, + std::string const separator /*= " "*/) const +{ + void* outputBuffer{}; + if (outputAllocator != nullptr) + { + outputBuffer = outputAllocator->getBuffer()->getHostBuffer(); + // Overwrite dimensions with those reported by the output allocator. + dims = outputAllocator->getFinalDims(); + os << "Final shape is " << dims << " reported by the output allocator." << std::endl; + } + else + { + outputBuffer = buffer->getHostBuffer(); + } + switch (dataType) + { + case nvinfer1::DataType::kBOOL: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT32: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT8: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kFLOAT: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kHALF: + { + dumpBuffer<__half>(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kBF16: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kUINT8: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT64: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported"); + case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); + } +} + +void Bindings::addBinding(TensorInfo const& tensorInfo, std::string const& fileName /*= ""*/) +{ + auto const b = tensorInfo.bindingIndex; + while (mBindings.size() <= static_cast(b)) + { + mBindings.emplace_back(); + mDevicePointers.emplace_back(); + } + mNames[tensorInfo.name] = b; + mBindings[b].isInput = tensorInfo.isInput; + mBindings[b].volume = tensorInfo.vol; + mBindings[b].dataType = tensorInfo.dataType; + if (tensorInfo.isDynamic) + { + ASSERT(!tensorInfo.isInput); // Only output shape can be possibly unknown because of DDS. + if (mBindings[b].outputAllocator == nullptr) + { + if (mUseManaged) + { + mBindings[b].outputAllocator.reset(new OutputAllocator(new UnifiedMirroredBuffer)); + } + else + { + mBindings[b].outputAllocator.reset(new OutputAllocator(new DiscreteMirroredBuffer)); + } + } + } + else + { + if (mBindings[b].buffer == nullptr) + { + if (mUseManaged) + { + mBindings[b].buffer.reset(new UnifiedMirroredBuffer); + } + else + { + mBindings[b].buffer.reset(new DiscreteMirroredBuffer); + } + } + // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr + // even for empty tensors, so allocate a dummy byte. + if (tensorInfo.vol == 0) + { + mBindings[b].buffer->allocate(1); + } + else + { + mBindings[b].buffer->allocate( + static_cast(tensorInfo.vol) * static_cast(dataTypeSize(tensorInfo.dataType))); + } + mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer(); + } + if (tensorInfo.isInput) + { + if (fileName.empty()) + { + fill(b); + } + else + { + fill(b, fileName); + } + } +} + +void** Bindings::getDeviceBuffers() +{ + return mDevicePointers.data(); +} + +void Bindings::transferInputToDevice(TrtCudaStream& stream) +{ + for (auto& b : mNames) + { + if (mBindings[b.second].isInput) + { + mBindings[b.second].buffer->hostToDevice(stream); + } + } +} + +void Bindings::transferOutputToHost(TrtCudaStream& stream) +{ + for (auto& b : mNames) + { + if (!mBindings[b.second].isInput) + { + if (mBindings[b.second].outputAllocator != nullptr) + { + mBindings[b.second].outputAllocator->getBuffer()->deviceToHost(stream); + } + else + { + mBindings[b.second].buffer->deviceToHost(stream); + } + } + } +} + +void Bindings::dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os, + std::string const& separator /*= " "*/, int32_t batch /*= 1*/) const +{ + auto const tensorName = context.getEngine().getIOTensorName(binding); + Dims dims = context.getTensorShape(tensorName); + Dims strides = context.getTensorStrides(tensorName); + int32_t vectorDim = context.getEngine().getTensorVectorizedDim(tensorName); + int32_t const spv = context.getEngine().getTensorComponentsPerElement(tensorName); + + mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator); +} + +namespace +{ + +std::string genFilenameSafeString(std::string const& s) +{ + std::string res = s; + static std::string const allowedSpecialChars{"._-,"}; + for (auto& c : res) + { + if (!isalnum(c) && allowedSpecialChars.find(c) == std::string::npos) + { + c = '_'; + } + } + return res; +} + +Dims getBindingDimensions(nvinfer1::IExecutionContext const& context, std::string const& name) +{ + return context.getTensorShape(name.c_str()); +} +} // namespace + +void Bindings::dumpRawBindingToFiles(nvinfer1::IExecutionContext const& context, std::ostream& os) const +{ + os << "Dumping I/O Bindings to RAW Files:" << std::endl; + for (auto const& n : mNames) + { + auto name = n.first; + auto bIndex = n.second; + auto const& binding = mBindings[bIndex]; + void* outputBuffer{}; + if (binding.outputAllocator != nullptr) + { + outputBuffer = binding.outputAllocator->getBuffer()->getHostBuffer(); + } + else + { + outputBuffer = binding.buffer->getHostBuffer(); + } + + Dims dims = getBindingDimensions(context, name); + std::string dimsStr; + std::string dotStr; + + for (int32_t i = 0; i < dims.nbDims; i++) + { + dimsStr += dotStr + std::to_string(dims.d[i]); + dotStr = "."; + } + + std::string const bindingTypeStr = (binding.isInput ? "input" : "output"); + + std::stringstream fileName; + fileName << genFilenameSafeString(name) << "." << bindingTypeStr << "." << dimsStr << "." << binding.dataType + << ".raw"; + + os << "Writing file for " << bindingTypeStr << " binding " << name << " (with datatype " << binding.dataType + << " and dimensions " << dimsStr << ") to " << fileName.str() << std::endl; + + std::ofstream f(fileName.str(), std::ios::out | std::ios::binary); + ASSERT(f && "Cannot open file for write"); + f.write(static_cast(outputBuffer), binding.volume * samplesCommon::elementSize(binding.dataType)); + f.close(); + } +} + +void Bindings::dumpBindingDimensions( + std::string const& name, nvinfer1::IExecutionContext const& context, std::ostream& os) const +{ + auto const dims = context.getTensorShape(name.c_str()); + // Do not add a newline terminator, because the caller may be outputting a JSON string. + os << dims; +} + +std::unordered_map Bindings::getBindings(std::function predicate) const +{ + std::unordered_map bindings; + for (auto const& n : mNames) + { + auto const binding = n.second; + if (predicate(mBindings[binding])) + { + bindings.insert(n); + } + } + return bindings; +} + +bool Bindings::setTensorAddresses(nvinfer1::IExecutionContext& context) const +{ + for (auto const& b : mNames) + { + auto const name = b.first.c_str(); + auto const location = context.getEngine().getTensorLocation(name); + if (location == TensorLocation::kDEVICE) + { + if (mBindings[b.second].outputAllocator != nullptr) + { + if (!context.setOutputAllocator(name, mBindings[b.second].outputAllocator.get())) + { + return false; + } + } + else + { + if (!context.setTensorAddress(name, mDevicePointers[b.second])) + { + return false; + } + } + } + } + return true; +} + +bool DebugTensorWriter::processDebugTensor(void const* addr, nvinfer1::TensorLocation location, nvinfer1::DataType type, + nvinfer1::Dims const& shape, char const* name, cudaStream_t stream) +{ + CHECK(cudaStreamSynchronize(stream)); + // Store data from callback. + int64_t size = std::accumulate(shape.d, shape.d + shape.nbDims, 1LL, std::multiplies{}) + * samplesCommon::elementSize(type); + std::vector hostDataOut(size, 0); + CHECK(cudaMemcpy(hostDataOut.data(), addr, size, cudaMemcpyDeviceToHost)); + + auto it = mDebugTensorFileNames.find(name); + ASSERT(it != mDebugTensorFileNames.end()); + std::string fileName = it->second; + + std::ofstream f(fileName, std::ios::out | std::ios::binary); + ASSERT(f && "Cannot open file for write"); + sample::gLogInfo << "Writing to file " << fileName << " for debug tensor " << name << std::endl; + f.write(hostDataOut.data(), size); + f.close(); + + CHECK(cudaStreamSynchronize(stream)); + return true; +} + } // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleInference.h b/src/Detector/tensorrt_yolo/common/sampleInference.h index 1c21f592..d9ebed92 100644 --- a/src/Detector/tensorrt_yolo/common/sampleInference.h +++ b/src/Detector/tensorrt_yolo/common/sampleInference.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,76 +18,243 @@ #ifndef TRT_SAMPLE_INFERENCE_H #define TRT_SAMPLE_INFERENCE_H +#include "sampleDevice.h" +#include "sampleEngines.h" #include "sampleReporting.h" #include "sampleUtils.h" +#include #include +#include #include #include #include -#include "NvInfer.h" +namespace sample +{ -#if (NV_TENSORRT_MAJOR > 7) +// IDebugListener class for writing debug tensors to output file. +class DebugTensorWriter : public nvinfer1::IDebugListener +{ +public: + DebugTensorWriter(std::unordered_map fileNames) + : mDebugTensorFileNames(fileNames) + { + } -#include "NvInferSafeRuntime.h" + bool processDebugTensor(void const* addr, nvinfer1::TensorLocation location, nvinfer1::DataType type, + nvinfer1::Dims const& shape, char const* name, cudaStream_t stream) override; -namespace sample -{ +private: + std::unordered_map mDebugTensorFileNames; +}; struct InferenceEnvironment { - TrtUniquePtr engine; + InferenceEnvironment() = delete; + InferenceEnvironment(InferenceEnvironment const& other) = delete; + InferenceEnvironment(InferenceEnvironment&& other) = delete; + InferenceEnvironment(BuildEnvironment& bEnv) : engine(std::move(bEnv.engine)), safe(bEnv.engine.isSafe()) + { + } + + LazilyDeserializedEngine engine; std::unique_ptr profiler; - std::vector> context; + std::vector> contexts; + std::vector + deviceMemory; //< Device memory used for inference when the allocation strategy is not static. std::vector> bindings; + std::unique_ptr listener; bool error{false}; - std::vector engineBlob; - bool safe{false}; - std::unique_ptr safeEngine; - std::vector> safeContext; - template - inline ContextType* getContext(int32_t streamIdx); + inline nvinfer1::IExecutionContext* getContext(int32_t streamIdx); + + //! Storage for input shape tensors. + //! + //! It's important that the addresses of the data do not change between the calls to + //! setTensorAddress/setInputShape (which tells TensorRT where the input shape tensor is) + //! and enqueueV3 (when TensorRT might use the input shape tensor). + //! + //! The input shape tensors could alternatively be handled via member bindings, + //! but it simplifies control-flow to store the data here since it's shared across + //! the bindings. + std::list> inputShapeTensorValues; }; -template <> inline nvinfer1::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx) { - return context[streamIdx].get(); -} - -template <> -inline nvinfer1::safe::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx) -{ - return safeContext[streamIdx].get(); + return contexts[streamIdx].get(); } //! //! \brief Set up contexts and bindings for inference //! -bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference); +bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system); //! //! \brief Deserialize the engine and time how long it takes. //! -bool timeDeserialize(InferenceEnvironment& iEnv); +bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys); //! //! \brief Run inference and collect timing, return false if any error hit during inference //! bool runInference( - const InferenceOptions& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace); + InferenceOptions const& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace); //! //! \brief Get layer information of the engine. //! -std::string getLayerInformation(const InferenceEnvironment& iEnv, nvinfer1::LayerInformationFormat format); +std::string getLayerInformation( + nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format); -} // namespace sample +struct Binding +{ + bool isInput{false}; + std::unique_ptr buffer; + std::unique_ptr outputAllocator; + int64_t volume{0}; + nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT}; + + void fill(std::string const& fileName); + + void fill(); + + void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv, + std::string const separator = " ") const; +}; + +struct TensorInfo +{ + int32_t bindingIndex{-1}; + char const* name{nullptr}; + nvinfer1::Dims dims{}; + bool isDynamic{}; + int32_t comps{-1}; + nvinfer1::Dims strides{}; + int32_t vectorDimIndex{-1}; + bool isInput{}; + nvinfer1::DataType dataType{}; + int64_t vol{-1}; + + void updateVolume(int32_t batch) + { + vol = volume(dims, strides, vectorDimIndex, comps, batch); + } +}; + +class Bindings +{ +public: + Bindings() = delete; + explicit Bindings(bool useManaged) + : mUseManaged(useManaged) + { + } + + void addBinding(TensorInfo const& tensorInfo, std::string const& fileName = ""); -#endif + void** getDeviceBuffers(); + + void transferInputToDevice(TrtCudaStream& stream); + + void transferOutputToHost(TrtCudaStream& stream); + + void fill(int binding, std::string const& fileName) + { + mBindings[binding].fill(fileName); + } + + void fill(int binding) + { + mBindings[binding].fill(); + } + + void dumpBindingDimensions( + std::string const& name, nvinfer1::IExecutionContext const& context, std::ostream& os) const; + + void dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os, + std::string const& separator = " ", int32_t batch = 1) const; + + void dumpRawBindingToFiles(nvinfer1::IExecutionContext const& context, std::ostream& os) const; + + void dumpInputs(nvinfer1::IExecutionContext const& context, std::ostream& os) const + { + auto isInput = [](Binding const& b) { return b.isInput; }; + dumpBindings(context, isInput, os); + } + + void dumpOutputs(nvinfer1::IExecutionContext const& context, std::ostream& os) const; + + void dumpBindings(nvinfer1::IExecutionContext const& context, std::ostream& os) const + { + auto all = [](Binding const& b) { return true; }; + dumpBindings(context, all, os); + } + + void dumpBindings(nvinfer1::IExecutionContext const& context, std::function predicate, + std::ostream& os) const + { + for (auto const& n : mNames) + { + auto const name = n.first; + auto const binding = n.second; + if (predicate(mBindings[binding])) + { + os << n.first << ": ("; + dumpBindingDimensions(name, context, os); + os << ")" << std::endl; + + dumpBindingValues(context, binding, os); + os << std::endl; + } + } + } + + std::unordered_map getInputBindings() const + { + auto isInput = [](Binding const& b) { return b.isInput; }; + return getBindings(isInput); + } + + std::unordered_map getOutputBindings() const + { + auto isOutput = [](Binding const& b) { return !b.isInput; }; + return getBindings(isOutput); + } + + std::unordered_map getBindings() const + { + auto all = [](Binding const& b) { return true; }; + return getBindings(all); + } + + std::unordered_map getBindings(std::function predicate) const; + + bool setTensorAddresses(nvinfer1::IExecutionContext& context) const; + +private: + std::unordered_map mNames; + std::vector mBindings; + std::vector mDevicePointers; + bool mUseManaged{false}; +}; + +struct TaskInferenceEnvironment +{ + TaskInferenceEnvironment(std::string engineFile, InferenceOptions inference, int32_t deviceId = 0, + int32_t DLACore = -1, int32_t bs = batchNotProvided); + InferenceOptions iOptions{}; + int32_t device{defaultDevice}; + int32_t batch{batchNotProvided}; + std::unique_ptr iEnv; + std::vector trace; +}; + +bool runMultiTasksInference(std::vector>& tEnvList); + +} // namespace sample #endif // TRT_SAMPLE_INFERENCE_H diff --git a/src/Detector/tensorrt_yolo/common/sampleOptions.cpp b/src/Detector/tensorrt_yolo/common/sampleOptions.cpp index 0afd163f..bdb1b21c 100644 --- a/src/Detector/tensorrt_yolo/common/sampleOptions.cpp +++ b/src/Detector/tensorrt_yolo/common/sampleOptions.cpp @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -19,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -27,29 +29,64 @@ #include "logger.h" #include "sampleOptions.h" - +#include "sampleUtils.h" +using namespace nvinfer1; namespace sample { namespace { -std::vector splitToStringVec(const std::string& option, char separator) +static const std::map> kUNIT_MULTIPLIERS{ + {'B', {1, "Bytes"}}, + {'K', {1 << 10, "Kibibytes"}}, + {'M', {1 << 20, "Mebibytes"}}, + {'G', {1 << 30, "Gibibytes"}}, +}; + +std::string addDefaultUnitSuffixIfNotSpecified(std::string const& option, char defaultUnit) { - std::vector options; + char lastChar = option.at(option.size() - 1); + return std::isdigit(lastChar) ? option + defaultUnit : option; +} - for (size_t start = 0; start < option.length();) +// Returns "B (Bytes), K (Kilobytes), ..." +std::string getAvailableUnitSuffixes() +{ + std::ostringstream ss; + for (auto it = kUNIT_MULTIPLIERS.begin(); it != kUNIT_MULTIPLIERS.end(); ++it) { - size_t separatorIndex = option.find(separator, start); - if (separatorIndex == std::string::npos) + if (it != kUNIT_MULTIPLIERS.begin()) { - separatorIndex = option.length(); + ss << ", "; } - options.emplace_back(option.substr(start, separatorIndex - start)); - start = separatorIndex + 1; + ss << it->first << " (" << it->second.second << ")"; } + return ss.str(); +} - return options; +// Numeric trtexec arguments can have unit specifiers in similar to polygraphy. +// E.g. --weightStreamingBudget=20M would be 20 Mebibytes (base 2). +int64_t getUnitMultiplier(std::string const& option) +{ + char lastChar = option.at(option.size() - 1); + if (!std::isdigit(lastChar)) + { + char unit = std::toupper(lastChar); + auto found = kUNIT_MULTIPLIERS.find(unit); + if (found == kUNIT_MULTIPLIERS.end()) + { + std::ostringstream ss; + ss << "Error parsing \"" << option << "\": invalid unit specifier '" << unit + << "'. Valid base-2 unit suffixes include: "; + ss << getAvailableUnitSuffixes() << "."; + throw std::invalid_argument(ss.str()); + } + return found->second.first; + } + + // Return bytes by default + return kUNIT_MULTIPLIERS.at('B').first; } template @@ -64,6 +101,12 @@ int32_t stringToValue(const std::string& option) return std::stoi(option); } +template <> +size_t stringToValue(const std::string& option) +{ + return std::stoi(option) * getUnitMultiplier(option); +} + template <> float stringToValue(const std::string& option) { @@ -73,7 +116,7 @@ float stringToValue(const std::string& option) template <> double stringToValue(const std::string& option) { - return std::stod(option); + return std::stod(option) * getUnitMultiplier(option); } template <> @@ -86,6 +129,10 @@ template <> std::vector stringToValue>(const std::string& option) { std::vector shape; + if (option == "scalar") + { + return shape; + } std::vector dimsStrings = splitToStringVec(option, 'x'); for (const auto& d : dimsStrings) { @@ -98,8 +145,9 @@ template <> nvinfer1::DataType stringToValue(const std::string& option) { const std::unordered_map strToDT{{"fp32", nvinfer1::DataType::kFLOAT}, - {"fp16", nvinfer1::DataType::kHALF}, {"int8", nvinfer1::DataType::kINT8}, - {"int32", nvinfer1::DataType::kINT32}}; + {"fp16", nvinfer1::DataType::kHALF}, {"bf16", nvinfer1::DataType::kBF16}, {"int8", nvinfer1::DataType::kINT8}, + {"fp8", nvinfer1::DataType::kFP8}, {"int32", nvinfer1::DataType::kINT32}, {"int64", nvinfer1::DataType::kINT64}, + {"bool", nvinfer1::DataType::kBOOL}, {"uint8", nvinfer1::DataType::kUINT8}, {"int4", nvinfer1::DataType::kINT4}}; const auto& dt = strToDT.find(option); if (dt == strToDT.end()) { @@ -108,6 +156,21 @@ nvinfer1::DataType stringToValue(const std::string& option) return dt->second; } +template <> +nvinfer1::DeviceType stringToValue(std::string const& option) +{ + std::unordered_map const strToDevice = { + {"GPU", nvinfer1::DeviceType::kGPU}, + {"DLA", nvinfer1::DeviceType::kDLA}, + }; + auto const& device = strToDevice.find(option); + if (device == strToDevice.end()) + { + throw std::invalid_argument("Invalid Device Type " + option); + } + return device->second; +} + template <> nvinfer1::TensorFormats stringToValue(const std::string& option) { @@ -116,7 +179,8 @@ nvinfer1::TensorFormats stringToValue(const std::string {"chw2", nvinfer1::TensorFormat::kCHW2}, {"chw4", nvinfer1::TensorFormat::kCHW4}, {"hwc8", nvinfer1::TensorFormat::kHWC8}, {"chw16", nvinfer1::TensorFormat::kCHW16}, {"chw32", nvinfer1::TensorFormat::kCHW32}, {"dhwc8", nvinfer1::TensorFormat::kDHWC8}, - {"hwc", nvinfer1::TensorFormat::kHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, + {"cdhw32", nvinfer1::TensorFormat::kCDHW32}, {"hwc", nvinfer1::TensorFormat::kHWC}, + {"dhwc", nvinfer1::TensorFormat::kDHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}}; nvinfer1::TensorFormats formats{}; for (auto f : optionStrings) @@ -149,11 +213,82 @@ IOFormat stringToValue(const std::string& option) return ioFormat; } +template <> +SparsityFlag stringToValue(std::string const& option) +{ + std::unordered_map const table{ + {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, {"force", SparsityFlag::kFORCE}}; + auto search = table.find(option); + if (search == table.end()) + { + throw std::invalid_argument(std::string("Unknown sparsity mode: ") + option); + } + if (search->second == SparsityFlag::kFORCE) + { + sample::gLogWarning << "--sparsity=force has been deprecated. " + << "Please use to rewrite the weights to a sparsity pattern " + << "and then run with --sparsity=enable" << std::endl; + } + + return search->second; +} + +template <> +WeightStreamingBudget stringToValue(std::string const& option) +{ + WeightStreamingBudget budget; + if (option.find('%') != std::string::npos) + { + double percent = std::stod(option); + if (!(percent >= 0 && percent <= 100.0)) + { + std::ostringstream err; + err << "The weight streaming percent must be between 0 and 100."; + throw std::invalid_argument(err.str()); + } + budget.percent = percent; + } + else + { + double bytes = stringToValue(option); + if (!(bytes == WeightStreamingBudget::kAUTOMATIC || bytes == WeightStreamingBudget::kDISABLE || bytes >= 0)) + { + std::ostringstream err; + err << "The weight streaming budget must be " << WeightStreamingBudget::kDISABLE << ", " + << WeightStreamingBudget::kAUTOMATIC << ", or at least 0."; + throw std::invalid_argument(err.str()); + } + budget.bytes = static_cast(bytes); + } + return budget; +} + template std::pair splitNameAndValue(const std::string& s) { std::string tensorName; std::string valueString; + + // Support 'inputName':Path format for --loadInputs flag when dealing with Windows paths. + // i.e. 'inputName':c:\inputData + std::vector quoteNameRange{ splitToStringVec(s, '\'') }; + // splitToStringVec returns the entire string when delimiter is not found, so it's size is always at least 1 + if (quoteNameRange.size() != 1) + { + if (quoteNameRange.size() != 3) + { + std::string errorMsg = std::string("Found invalid number of \'s when parsing ") + s + + std::string(". Expected: 2, received: ") + std::to_string(quoteNameRange.size() -1) + + ". Please ensure that a singular comma is used within each comma-separated key-value pair for options like --inputIOFormats, --optShapes, --optShapesCalib, --layerPrecisions, etc."; + throw std::invalid_argument(errorMsg); + } + // Everything before the second "'" is the name. + tensorName = quoteNameRange[0] + quoteNameRange[1]; + // Path is the last string - ignoring leading ":" so slice it with [1:] + valueString = quoteNameRange[2].substr(1); + return std::pair(tensorName, stringToValue(valueString)); + } + // Split on the last : std::vector nameRange{splitToStringVec(s, ':')}; // Everything before the last : is the name @@ -181,16 +316,71 @@ const char* boolToEnabled(bool enable) return enable ? "Enabled" : "Disabled"; } +//! A helper function similar to sep.join(list) in Python. +template +std::string joinValuesToString(std::vector const& list, std::string const& sep) +{ + std::ostringstream os; + for (int32_t i = 0, n = list.size(); i < n; ++i) + { + os << list[i]; + if (i != n - 1) + { + os << sep; + } + } + return os.str(); +} + +template +std::string joinValuesToString(std::array const& list, std::string const& sep) +{ + return joinValuesToString(std::vector(list.begin(), list.end()), sep); +} + //! Check if input option exists in input arguments. -//! If it does: return its value, erase the argument and return true. +//! If it does: set its value, and return true //! If it does not: return false. template -bool getAndDelOption(Arguments& arguments, const std::string& option, T& value) +bool getOption(Arguments& arguments, const std::string& option, T& value) { - const auto match = arguments.find(option); + auto const match = arguments.find(option); if (match != arguments.end()) { - value = stringToValue(match->second); + value = stringToValue(match->second.first); + return true; + } + + return false; +} + +//! Check if input option exists in input arguments. +//! If it does: set its value, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOption(Arguments& arguments, const std::string& option, T_& value) +{ + bool found = getOption(arguments, option, value); + if (found) + { + const auto match = arguments.find(option); + arguments.erase(match); + } + + return found; +} + +//! Check if input option exists in input arguments. +//! If it does: set its value and position, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOptionWithPosition(Arguments& arguments, std::string const& option, T_& value, int32_t& pos) +{ + auto const match = arguments.find(option); + if (match != arguments.end()) + { + value = stringToValue(match->second.first); + pos = match->second.second; arguments.erase(match); return true; } @@ -198,8 +388,31 @@ bool getAndDelOption(Arguments& arguments, const std::string& option, T& value) return false; } +//! Check if input option exists in input arguments behind the position spcecified by pos. +//! If it does: set its value, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOptionBehind(Arguments& arguments, std::string const& option, int32_t pos, T_& value) +{ + auto const match = arguments.equal_range(option); + if (match.first == match.second) + { + return false; + } + for (auto i = match.first; i != match.second; ++i) + { + if (i->second.second - pos == 1) + { + value = stringToValue(i->second.first); + arguments.erase(i); + return true; + } + } + return false; +} + //! Check if input option exists in input arguments. -//! If it does: return false in value, erase the argument and return true. +//! If it does: set false in value, erase the argument and return true. //! If it does not: return false. bool getAndDelNegOption(Arguments& arguments, const std::string& option, bool& value) { @@ -224,34 +437,37 @@ bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, st return false; } - auto addToValues = [&values](Arguments::value_type& argValue) {values.emplace_back(stringToValue(argValue.second));}; + auto addToValues + = [&values](Arguments::value_type& argValue) { values.emplace_back(stringToValue(argValue.second.first)); }; std::for_each(match.first, match.second, addToValues); arguments.erase(match.first, match.second); return true; } -void insertShapesBuild(std::unordered_map& shapes, nvinfer1::OptProfileSelector selector, const std::string& name, const std::vector& dims) +void insertShapesBuild(BuildOptions::ShapeProfile& shapes, nvinfer1::OptProfileSelector selector, + const std::string& name, const std::vector& dims) { shapes[name][static_cast(selector)] = dims; } -void insertShapesInference(std::unordered_map>& shapes, const std::string& name, const std::vector& dims) +void insertShapesInference( + InferenceOptions::ShapeProfile& shapes, std::string const& name, std::vector const& dims) { shapes[name] = dims; } std::string removeSingleQuotationMarks(std::string& str) { - std::vector strList{splitToStringVec(str, '\'')}; - // Remove all the escaped single quotation marks - std::string retVal = ""; - // Do not really care about unterminated sequences - for (size_t i = 0; i < strList.size(); i++) - { - retVal += strList[i]; - } - return retVal; + std::vector strList{splitToStringVec(str, '\'')}; + // Remove all the escaped single quotation marks + std::string retVal; + // Do not really care about unterminated sequences + for (size_t i = 0; i < strList.size(); i++) + { + retVal += strList[i]; + } + return retVal; } void getLayerPrecisions(Arguments& arguments, char const* argument, LayerPrecisions& layerPrecisions) @@ -293,7 +509,41 @@ void getLayerOutputTypes(Arguments& arguments, char const* argument, LayerOutput } } -bool getShapesBuild(Arguments& arguments, std::unordered_map& shapes, char const* argument, +void getLayerDeviceTypes(Arguments& arguments, char const* argument, LayerDeviceTypes& layerDeviceTypes) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + // The layerDeviceTypes flag contains comma-separated layerName:deviceType pairs. + std::vector deviceList{splitToStringVec(list, ',')}; + for (auto const& s : deviceList) + { + auto nameDevicePair = splitNameAndValue(s); + auto const layerName = removeSingleQuotationMarks(nameDevicePair.first); + layerDeviceTypes[layerName] = stringToValue(nameDevicePair.second); + } +} + +void getStringsSet(Arguments& arguments, char const* argument, StringSet& stringSet) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + // The layerPrecisions flag contains comma-separated layerName:precision pairs. + std::vector strings{splitToStringVec(list, ',')}; + for (auto const& s : strings) + { + stringSet.insert(s); + } +} + +bool getShapesBuild(Arguments& arguments, BuildOptions::ShapeProfile& shapes, char const* argument, nvinfer1::OptProfileSelector selector) { std::string list; @@ -309,7 +559,7 @@ bool getShapesBuild(Arguments& arguments, std::unordered_map>& shapes, const char* argument) +bool getShapesInference(Arguments& arguments, InferenceOptions::ShapeProfile& shapes, const char* argument) { std::string list; bool retVal = getAndDelOption(arguments, argument, list); @@ -324,67 +574,195 @@ bool getShapesInference(Arguments& arguments, std::unordered_map& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib) +void fillShapes(BuildOptions::ShapeProfile& shapes, std::string const& name, ShapeRange const& sourceShapeRange, + nvinfer1::OptProfileSelector minDimsSource, nvinfer1::OptProfileSelector optDimsSource, + nvinfer1::OptProfileSelector maxDimsSource) { - // Only accept optShapes only or all three of minShapes, optShapes, maxShapes - if ( ((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes only, both minShapes and maxShapes + insertShapesBuild( + shapes, nvinfer1::OptProfileSelector::kMIN, name, sourceShapeRange[static_cast(minDimsSource)]); + insertShapesBuild( + shapes, nvinfer1::OptProfileSelector::kOPT, name, sourceShapeRange[static_cast(optDimsSource)]); + insertShapesBuild( + shapes, nvinfer1::OptProfileSelector::kMAX, name, sourceShapeRange[static_cast(maxDimsSource)]); +} + +void processShapes(BuildOptions::ShapeProfile& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib) +{ + // Only accept optShapes only or all three of minShapes, optShapes, maxShapes when calib is set + if (((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes only, both minShapes and maxShapes || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes { if (calib) { - throw std::invalid_argument("Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib"); - } - else - { - throw std::invalid_argument("Must specify only --optShapes or all of --minShapes, --optShapes, --maxShapes"); + throw std::invalid_argument( + "Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib"); } } - // If optShapes only, expand optShapes to minShapes and maxShapes - if (optShapes && !minShapes && !maxShapes) + if (!minShapes && !optShapes && !maxShapes) { - std::unordered_map newShapes; - for (auto& s : shapes) + return; + } + + BuildOptions::ShapeProfile newShapes; + for (auto& s : shapes) + { + nvinfer1::OptProfileSelector minDimsSource, optDimsSource, maxDimsSource; + minDimsSource = nvinfer1::OptProfileSelector::kMIN; + optDimsSource = nvinfer1::OptProfileSelector::kOPT; + maxDimsSource = nvinfer1::OptProfileSelector::kMAX; + + // Populate missing minShapes + if (!minShapes) + { + if (optShapes) + { + minDimsSource = optDimsSource; + sample::gLogWarning << "optShapes is being broadcasted to minShapes for tensor " << s.first + << std::endl; + } + else + { + minDimsSource = maxDimsSource; + sample::gLogWarning << "maxShapes is being broadcasted to minShapes for tensor " << s.first + << std::endl; + } + } + + // Populate missing optShapes + if (!optShapes) { - insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kMIN, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kOPT, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kMAX, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + if (maxShapes) + { + optDimsSource = maxDimsSource; + sample::gLogWarning << "maxShapes is being broadcasted to optShapes for tensor " << s.first + << std::endl; + } + else + { + optDimsSource = minDimsSource; + sample::gLogWarning << "minShapes is being broadcasted to optShapes for tensor " << s.first + << std::endl; + } + } + + // Populate missing maxShapes + if (!maxShapes) + { + if (optShapes) + { + maxDimsSource = optDimsSource; + sample::gLogWarning << "optShapes is being broadcasted to maxShapes for tensor " << s.first + << std::endl; + } + else + { + maxDimsSource = minDimsSource; + sample::gLogWarning << "minShapes is being broadcasted to maxShapes for tensor " << s.first + << std::endl; + } } - shapes = newShapes; + + fillShapes(newShapes, s.first, s.second, minDimsSource, optDimsSource, maxDimsSource); } + shapes = newShapes; } -template -void printShapes(std::ostream& os, const char* phase, const T& shapes) +bool getOptimizationProfiles( + Arguments& arguments, std::vector& optProfiles, char const* argument) { - if (shapes.empty()) + bool retValue{false}; + int32_t pos{}; + size_t profileIndex{}; + + auto getShapes + = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) { + std::vector shapeList{splitToStringVec(list, ',')}; + for (auto const& s : shapeList) + { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesBuild(shapes, selector, tensorName, dims); + } + }; + + while (getAndDelOptionWithPosition(arguments, argument, profileIndex, pos)) { - os << "Input " << phase << " shapes: model" << std::endl; + BuildOptions::ShapeProfile optProfile{}; + bool minShapes{false}, maxShapes{false}, optShapes{false}; + for (int32_t i = 0; i < nvinfer1::EnumMax(); i++, pos++) + { + std::string value; + + if (!minShapes && getAndDelOptionBehind(arguments, "--minShapes", pos, value)) + { + minShapes = true; + getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMIN); + } + else if (!maxShapes && getAndDelOptionBehind(arguments, "--maxShapes", pos, value)) + { + maxShapes = true; + getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMAX); + } + else if (!optShapes && getAndDelOptionBehind(arguments, "--optShapes", pos, value)) + { + optShapes = true; + getShapes(optProfile, value, nvinfer1::OptProfileSelector::kOPT); + } + else + { + break; + } + } + processShapes(optProfile, minShapes, optShapes, maxShapes, false); + if (profileIndex >= optProfiles.size()) + { + optProfiles.resize(profileIndex + 1); + } + if (!optProfiles[profileIndex].empty()) + { + throw std::invalid_argument("Optimization profile index cannot be the same."); + } + optProfiles[profileIndex] = optProfile; + retValue = true; } - else + + profileIndex = 0; + for (auto const& optProfile : optProfiles) { - for (const auto& s : shapes) + if (optProfile.empty()) { - os << "Input " << phase << " shape: " << s.first << "=" << s.second << std::endl; + throw std::invalid_argument(std::string("Found invalid or missing shape spec at profile index ") + + std::to_string(profileIndex) + std::string(". ")); } + ++profileIndex; } + return retValue; } -std::ostream& printBatch(std::ostream& os, int32_t maxBatch) +template +void printShapes(std::ostream& os, char const* phase, T const& shapes, int32_t profileIndex) { - if (maxBatch != maxBatchNotProvided) + if (shapes.empty()) { - os << maxBatch; + os << "Input " << phase << " shapes: model" << std::endl; } else { - os << "explicit batch"; + std::string profileString = (profileIndex != -1 && strcmp(phase, "build") == 0) + ? "(profile " + std::to_string(profileIndex) + ")" + : ""; + for (auto const& s : shapes) + { + os << "Input " << phase << " shape " << profileString << ": " << s.first << "=" << s.second << std::endl; + } } - return os; } -std::ostream& printTacticSources(std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources) +std::ostream& printTacticSources( + std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources) { if (!enabledSources && !disabledSources) { @@ -405,24 +783,41 @@ std::ostream& printTacticSources(std::ostream& os, nvinfer1::TacticSources enabl addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS), "cublas"); addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS_LT), "cublasLt"); -#if (NV_TENSORRT_MAJOR > 7) addSource(1U << static_cast(nvinfer1::TacticSource::kCUDNN), "cudnn"); -#endif + addSource(1U << static_cast(nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS), "edge mask convolutions"); + addSource(1U << static_cast(nvinfer1::TacticSource::kJIT_CONVOLUTIONS), "JIT convolutions"); } return os; } std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) { + if (options.stronglyTyped) + { + os << "Strongly Typed"; + return os; + } os << "FP32"; if (options.fp16) { os << "+FP16"; } + if (options.bf16) + { + os << "+BF16"; + } if (options.int8) { os << "+INT8"; } + if (options.fp8) + { + os << "+FP8"; + } + if (options.int4) + { + os << "+INT4"; + } if (options.precisionConstraints == PrecisionConstraints::kOBEY) { os << " (obey precision constraints)"; @@ -434,13 +829,27 @@ std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) return os; } -std::ostream& printTimingCache(std::ostream& os, BuildOptions const& options) +std::ostream& printTempfileControls(std::ostream& os, TempfileControlFlags const tempfileControls) +{ + auto getFlag = [&](TempfileControlFlag f) -> char const* { + bool allowed = !!(tempfileControls & (1U << static_cast(f))); + return allowed ? "allow" : "deny"; + }; + auto const inMemory = getFlag(TempfileControlFlag::kALLOW_IN_MEMORY_FILES); + auto const temporary = getFlag(TempfileControlFlag::kALLOW_TEMPORARY_FILES); + + os << "{ in_memory: " << inMemory << ", temporary: " << temporary << " }"; + + return os; +} + +std::ostream& printTimingCache(std::ostream& os, TimingCacheMode const& timingCacheMode) { - switch (options.timingCacheMode) + switch (timingCacheMode) { - case TimingCacheMode::kGLOBAL: os << "global"; break; - case TimingCacheMode::kLOCAL: os << "local"; break; - case TimingCacheMode::kDISABLE: os << "disable"; break; + case TimingCacheMode::kGLOBAL: os << "global"; break; + case TimingCacheMode::kLOCAL: os << "local"; break; + case TimingCacheMode::kDISABLE: os << "disable"; break; } return os; } @@ -459,20 +868,67 @@ std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) { - auto const printValueOrDefault = [&os](double const val) { + auto const printValueOrDefault = [&os](double const val, char const* unit = "MiB") { if (val >= 0) { - os << val << " MiB"; + os << val << " " << unit; } else { os << "default"; } }; - os << "workspace: "; printValueOrDefault(options.workspace); os << ", "; - os << "dlaSRAM: "; printValueOrDefault(options.dlaSRAM); os << ", "; - os << "dlaLocalDRAM: "; printValueOrDefault(options.dlaLocalDRAM); os << ", "; - os << "dlaGlobalDRAM: "; printValueOrDefault(options.dlaGlobalDRAM); + os << "workspace: "; + printValueOrDefault(options.workspace); + os << ", "; + os << "dlaSRAM: "; + printValueOrDefault(options.dlaSRAM); + os << ", "; + os << "dlaLocalDRAM: "; + printValueOrDefault(options.dlaLocalDRAM); + os << ", "; + os << "dlaGlobalDRAM: "; + printValueOrDefault(options.dlaGlobalDRAM); + os << ", "; + os << "tacticSharedMem: "; + printValueOrDefault(options.tacticSharedMem, "KiB"); + return os; +} + +std::string previewFeatureToString(PreviewFeature feature) +{ + // clang-format off + switch (feature) + { + case PreviewFeature::kPROFILE_SHARING_0806: + { + gLogWarning << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." << std::endl; + break; + } + case PreviewFeature::kALIASED_PLUGIN_IO_10_03: return "kALIASED_PLUGIN_IO_10_03"; + } + return "Invalid Preview Feature"; + // clang-format on +} + +std::ostream& printPreviewFlags(std::ostream& os, BuildOptions const& options) +{ + if (options.previewFeatures.empty()) + { + os << "Use default preview flags."; + return os; + } + + auto const addFlag = [&](PreviewFeature feat) { + int32_t featVal = static_cast(feat); + if (options.previewFeatures.find(featVal) != options.previewFeatures.end()) + { + os << previewFeatureToString(feat) << (options.previewFeatures.at(featVal) ? " [ON], " : " [OFF], "); + } + }; + + addFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03); + return os; } @@ -487,51 +943,41 @@ Arguments argsToArgumentsMap(int32_t argc, char* argv[]) if (valuePtr) { std::string value{valuePtr + 1}; - arguments.emplace(std::string(argv[i], valuePtr - argv[i]), value); + arguments.emplace(std::string(argv[i], valuePtr - argv[i]), std::make_pair(value, i)); } else { - arguments.emplace(argv[i], ""); + arguments.emplace(argv[i], std::make_pair(std::string(""), i)); } } return arguments; } -void BaseModelOptions::parse(Arguments& arguments) +namespace { - if (getAndDelOption(arguments, "--onnx", model)) - { - format = ModelFormat::kONNX; - } - else if (getAndDelOption(arguments, "--uff", model)) - { - format = ModelFormat::kUFF; - } - else if (getAndDelOption(arguments, "--model", model)) +std::string resolveHomeDirectoryOnLinux(std::string const& model) +{ + std::string filePath{model}; +#ifndef _WIN32 + if (filePath[0] == '~') { - format = ModelFormat::kCAFFE; + char const* home = std::getenv("HOME"); + if (home) + { + filePath.replace(0, 1, home); + } } +#endif + return filePath; } +} // namespace -void UffInput::parse(Arguments& arguments) +void BaseModelOptions::parse(Arguments& arguments) { - getAndDelOption(arguments, "--uffNHWC", NHWC); - std::vector args; - if (getAndDelRepeatedOption(arguments, "--uffInput", args)) + if (getAndDelOption(arguments, "--onnx", model)) { - for (const auto& i : args) - { - std::vector values{splitToStringVec(i, ',')}; - if (values.size() == 4) - { - nvinfer1::Dims3 dims{std::stoi(values[1]), std::stoi(values[2]), std::stoi(values[3])}; - inputs.emplace_back(values[0], dims); - } - else - { - throw std::invalid_argument(std::string("Invalid uffInput ") + i); - } - } + format = ModelFormat::kONNX; + model = resolveHomeDirectoryOnLinux(model); } } @@ -541,56 +987,66 @@ void ModelOptions::parse(Arguments& arguments) switch (baseModel.format) { - case ModelFormat::kCAFFE: + case ModelFormat::kONNX: + case ModelFormat::kANY: { - getAndDelOption(arguments, "--deploy", prototxt); break; } - case ModelFormat::kUFF: - { - uffInputs.parse(arguments); - if (uffInputs.inputs.empty()) - { - throw std::invalid_argument("Uff models require at least one input"); - } - break; } - case ModelFormat::kONNX: - break; - case ModelFormat::kANY: + + if (baseModel.format == ModelFormat::kONNX) { - if (getAndDelOption(arguments, "--deploy", prototxt)) + if (!outputs.empty()) { - baseModel.format = ModelFormat::kCAFFE; + throw std::invalid_argument("The --output flag should not be used with ONNX models."); } - break; } +} + +void getTempfileControls(Arguments& arguments, char const* argument, TempfileControlFlags& tempfileControls) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; } - // The --output flag should only be used with Caffe and UFF. It has no effect on ONNX. - std::vector outArgs; - if (getAndDelRepeatedOption(arguments, "--output", outArgs)) + std::vector controlList{splitToStringVec(list, ',')}; + for (auto const& s : controlList) { - for (const auto& o : outArgs) + auto controlAllowPair = splitNameAndValue(s); + bool allowed{false}; + int32_t offset{-1}; + + if (controlAllowPair.second.compare("allow") == 0) { - for (auto& v : splitToStringVec(o, ',')) - { - outputs.emplace_back(std::move(v)); - } + allowed = true; } - } - if (baseModel.format == ModelFormat::kCAFFE || baseModel.format == ModelFormat::kUFF) - { - if (outputs.empty()) + else if (controlAllowPair.second.compare("deny") != 0) { - throw std::invalid_argument("Caffe and Uff models require at least one output"); + throw std::invalid_argument("--tempfileControls value should be `deny` or `allow`"); } - } - else if (baseModel.format == ModelFormat::kONNX) - { - if (!outputs.empty()) + + if (controlAllowPair.first.compare("in_memory") == 0) { - throw std::invalid_argument("The --output flag should not be used with ONNX models."); + offset = static_cast(TempfileControlFlag::kALLOW_IN_MEMORY_FILES); + } + else if (controlAllowPair.first.compare("temporary") == 0) + { + offset = static_cast(TempfileControlFlag::kALLOW_TEMPORARY_FILES); + } + else + { + throw std::invalid_argument(std::string{"Unknown --tempfileControls key "} + controlAllowPair.first); + } + + if (allowed) + { + tempfileControls |= (1U << offset); + } + else + { + tempfileControls &= ~(1U << offset); } } } @@ -610,38 +1066,59 @@ void BuildOptions::parse(Arguments& arguments) getFormats(inputFormats, "--inputIOFormats"); getFormats(outputFormats, "--outputIOFormats"); - bool addedExplicitBatchFlag{false}; - getAndDelOption(arguments, "--explicitBatch", addedExplicitBatchFlag); - if (addedExplicitBatchFlag) - { - sample::gLogWarning << "--explicitBatch flag has been deprecated and has no effect!" << std::endl; - sample::gLogWarning << "Explicit batch dim is automatically enabled if input model is ONNX or if dynamic " - << "shapes are provided when the engine is built." << std::endl; - } - - bool minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); - bool optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); - bool maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); - processShapes(shapes, minShapes, optShapes, maxShapes, false); - bool minShapesCalib - = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN); - bool optShapesCalib - = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT); - bool maxShapesCalib - = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX); - processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true); + bool getCalibProfile = getAndDelOption(arguments, "--calibProfile", calibProfile); + if (!getOptimizationProfiles(arguments, optProfiles, "--profile")) + { + ShapeProfile shapes; + bool minShapes{false}, optShapes{false}, maxShapes{false}; + try + { + minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); + optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); + maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); + } + catch (std::invalid_argument const& arg) + { + throw std::invalid_argument(arg.what() + + std::string(" conversion failure: failed to parse minShapes/optShapes/maxShapes. Please double check " + "your input string.")); + } - bool addedExplicitPrecisionFlag{false}; - getAndDelOption(arguments, "--explicitPrecision", addedExplicitPrecisionFlag); - if (addedExplicitPrecisionFlag) + processShapes(shapes, minShapes, optShapes, maxShapes, false); + optProfiles.emplace_back(shapes); + } + + if (calibProfile >= optProfiles.size()) + { + throw std::invalid_argument( + std::string("--calibProfile shouldn't greater than the size of optimization profile.")); + } + + BuildOptions::ShapeProfile dummyShapes; + + bool remainingMinShapes = getShapesBuild(arguments, dummyShapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); + bool remainingOptShapes = getShapesBuild(arguments, dummyShapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); + bool remainingMaxShapes = getShapesBuild(arguments, dummyShapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); + if (remainingMinShapes || remainingOptShapes || remainingMaxShapes) { - sample::gLogWarning << "--explicitPrecision flag has been deprecated and has no effect!" << std::endl; + throw std::invalid_argument("Multiple --minShapes/--optShapes/--maxShapes without --profile are not allowed. "); } - if (getAndDelOption(arguments, "--workspace", workspace)) + bool minShapesCalib{false}, optShapesCalib{false}, maxShapesCalib{false}; + try { - sample::gLogWarning << "--workspace flag has been deprecated by --memPoolSize flag." << std::endl; + minShapesCalib = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN); + optShapesCalib = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT); + maxShapesCalib = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX); } + catch (std::invalid_argument const& arg) + { + throw std::invalid_argument(arg.what() + + std::string(" conversion failure: failed to parse minShapesCalib/optShapesCalib/maxShapesCalib. Please " + "double check your input string.")); + } + + processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true); std::string memPoolSizes; getAndDelOption(arguments, "--memPoolSize", memPoolSizes); @@ -650,26 +1127,47 @@ void BuildOptions::parse(Arguments& arguments) { std::string memPoolName; double memPoolSize; - std::tie(memPoolName, memPoolSize) = splitNameAndValue(memPoolSpec); + try + { + std::string strPoolSize; + std::tie(memPoolName, strPoolSize) = splitNameAndValue(memPoolSpec); + memPoolSize = stringToValue(addDefaultUnitSuffixIfNotSpecified(strPoolSize, 'M')); + } + catch (std::invalid_argument const& arg) + { + throw std::invalid_argument(arg.what() + + std::string( + " conversion failure: failed to parse --memPoolSize. Please double check your input string.")); + } + if (memPoolSize < 0) { throw std::invalid_argument(std::string("Negative memory pool size: ") + std::to_string(memPoolSize)); } if (memPoolName == "workspace") { - workspace = memPoolSize; + // use unit in MB. + workspace = memPoolSize / 1.0_MiB; } else if (memPoolName == "dlaSRAM") { - dlaSRAM = memPoolSize; + // use unit in MB. + dlaSRAM = memPoolSize / 1.0_MiB; } else if (memPoolName == "dlaLocalDRAM") { - dlaLocalDRAM = memPoolSize; + // use unit in MB. + dlaLocalDRAM = memPoolSize / 1.0_MiB; } else if (memPoolName == "dlaGlobalDRAM") { - dlaGlobalDRAM = memPoolSize; + // use unit in MB. + dlaGlobalDRAM = memPoolSize / 1.0_MiB; + } + else if (memPoolName == "tacticSharedMem") + { + // use unit in KB. + tacticSharedMem = memPoolSize / 1.0_KiB; } else if (!memPoolName.empty()) { @@ -677,8 +1175,6 @@ void BuildOptions::parse(Arguments& arguments) } } - getAndDelOption(arguments, "--maxBatch", maxBatch); - getAndDelOption(arguments, "--minTiming", minTiming); getAndDelOption(arguments, "--avgTiming", avgTiming); bool best{false}; @@ -687,16 +1183,79 @@ void BuildOptions::parse(Arguments& arguments) { int8 = true; fp16 = true; + + // BF16 only supported on Ampere+ + if (samplesCommon::getSMVersion() >= 0x0800) + { + bf16 = true; + } } getAndDelOption(arguments, "--refit", refittable); + + getAndDelOption(arguments, "--weightless", stripWeights); + getAndDelOption(arguments, "--stripWeights", stripWeights); + + bool stripAllWeights{}; + getAndDelOption(arguments, "--stripAllWeights", stripAllWeights); + if (stripAllWeights) + { + refittable = true; + stripWeights = true; + } + + // --vc and --versionCompatible are synonyms + getAndDelOption(arguments, "--vc", versionCompatible); + if (!versionCompatible) + { + getAndDelOption(arguments, "--versionCompatible", versionCompatible); + } + +#if !TRT_WINML + // --pi and --pluginInstanceNorm are synonyms + getAndDelOption(arguments, "--pi", pluginInstanceNorm); + if (!pluginInstanceNorm) + { + getAndDelOption(arguments, "--pluginInstanceNorm", pluginInstanceNorm); + } +#endif + + getAndDelOption(arguments, "--excludeLeanRuntime", excludeLeanRuntime); + getAndDelOption(arguments, "--noCompilationCache", disableCompilationCache); getAndDelNegOption(arguments, "--noTF32", tf32); getAndDelOption(arguments, "--fp16", fp16); + getAndDelOption(arguments, "--bf16", bf16); getAndDelOption(arguments, "--int8", int8); + getAndDelOption(arguments, "--fp8", fp8); + getAndDelOption(arguments, "--int4", int4); + getAndDelOption(arguments, "--stronglyTyped", stronglyTyped); + if (stronglyTyped) + { + auto disableAndLog = [](bool& flag, std::string mode, std::string type) { + if (flag) + { + flag = false; + sample::gLogWarning << "Invalid usage, setting " << mode + << " mode is not allowed if graph is strongly typed. Disabling BuilderFlag::" + << type << "." << std::endl; + } + }; + disableAndLog(fp16, "fp16", "kFP16"); + disableAndLog(int8, "int8", "kINT8"); + disableAndLog(bf16, "bf16", "kBF16"); + disableAndLog(fp8, "fp8", "kFP8"); + disableAndLog(int4, "int4", "kINT4"); + } + + if (fp8 && int8) + { + throw std::invalid_argument("Invalid usage, fp8 and int8 aren't allowed to be enabled together."); + } getAndDelOption(arguments, "--safe", safe); - getAndDelOption(arguments, "--consistency", consistency); + getAndDelOption(arguments, "--buildDLAStandalone", buildDLAStandalone); + getAndDelOption(arguments, "--allowGPUFallback", allowGPUFallback); getAndDelOption(arguments, "--restricted", restricted); - + getAndDelOption(arguments, "--skipInference", skipInference); getAndDelOption(arguments, "--directIO", directIO); std::string precisionConstraintsString; @@ -720,10 +1279,11 @@ void BuildOptions::parse(Arguments& arguments) getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions); getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes); + getLayerDeviceTypes(arguments, "--layerDeviceTypes", layerDeviceTypes); if (layerPrecisions.empty() && layerOutputTypes.empty() && precisionConstraints != PrecisionConstraints::kNONE) { - sample::gLogWarning << "When --precisionConstraints flag is set to \"obey\" or \"prefer\", please add " + sample::gLogWarning << R"(When --precisionConstraints flag is set to "obey" or "prefer", please add )" << "--layerPrecision/--layerOutputTypes flags to set layer-wise precisions and output " << "types." << std::endl; } @@ -731,79 +1291,52 @@ void BuildOptions::parse(Arguments& arguments) && precisionConstraints == PrecisionConstraints::kNONE) { sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no effect when --precisionConstraints " - << "flag is set to \"none\"." << std::endl; + << R"(flag is set to "none".)" << std::endl; } - std::string sparsityString; - getAndDelOption(arguments, "--sparsity", sparsityString); - if (sparsityString == "disable") - { - sparsity = SparsityFlag::kDISABLE; - } - else if (sparsityString == "enable") - { - sparsity = SparsityFlag::kENABLE; - } - else if (sparsityString == "force") - { - sparsity = SparsityFlag::kFORCE; - } - else if (!sparsityString.empty()) - { - throw std::invalid_argument(std::string("Unknown sparsity mode: ") + sparsityString); - } + getStringsSet(arguments, "--markDebug", debugTensors); + + getAndDelOption(arguments, "--sparsity", sparsity); bool calibCheck = getAndDelOption(arguments, "--calib", calibration); - if (int8 && calibCheck && !shapes.empty() && shapesCalib.empty()) + if (int8 && calibCheck && !optProfiles[calibProfile].empty() && shapesCalib.empty()) { - shapesCalib = shapes; + shapesCalib = optProfiles[calibProfile]; } - - std::string profilingVerbosityString; - if (getAndDelOption(arguments, "--nvtxMode", profilingVerbosityString)) + else if (!shapesCalib.empty() && getCalibProfile) { - sample::gLogWarning << "--nvtxMode flag has been deprecated by --profilingVerbosity flag." << std::endl; + sample::gLogWarning + << "--calibProfile have no effect when --minShapesCalib/--optShapesCalib/--maxShapesCalib is set." + << std::endl; } + std::string profilingVerbosityString; + getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString); if (profilingVerbosityString == "layer_names_only") { -#if (NV_TENSORRT_MAJOR > 7) profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; -#else - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDEFAULT; -#endif } else if (profilingVerbosityString == "none") { profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE; } -#if (NV_TENSORRT_MAJOR > 7) else if (profilingVerbosityString == "detailed") { profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; } -#endif else if (profilingVerbosityString == "default") { -#if (NV_TENSORRT_MAJOR > 7) sample::gLogWarning << "--profilingVerbosity=default has been deprecated by " "--profilingVerbosity=layer_names_only." << std::endl; profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; -#else - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDEFAULT; -#endif } else if (profilingVerbosityString == "verbose") { -#if (NV_TENSORRT_MAJOR > 7) sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated by --profilingVerbosity=detailed." << std::endl; profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; -#else - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDEFAULT; -#endif } else if (!profilingVerbosityString.empty()) { @@ -814,6 +1347,8 @@ void BuildOptions::parse(Arguments& arguments) { load = true; } + getAndDelOption(arguments, "--getPlanVersionOnly", getPlanVersionOnly); + if (getAndDelOption(arguments, "--saveEngine", engine)) { save = true; @@ -858,12 +1393,18 @@ void BuildOptions::parse(Arguments& arguments) { source = nvinfer1::TacticSource::kCUBLAS_LT; } -#if (NV_TENSORRT_MAJOR > 7) else if (t == "CUDNN") { source = nvinfer1::TacticSource::kCUDNN; } -#endif + else if (t == "EDGE_MASK_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; + } + else if (t == "JIT_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; + } else { throw std::invalid_argument(std::string("Unknown tactic source: ") + t); @@ -887,38 +1428,179 @@ void BuildOptions::parse(Arguments& arguments) } } - bool noBuilderCache{false}; - getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); - getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); - if (noBuilderCache) + bool noBuilderCache{false}; + getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); + getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); + if (noBuilderCache) + { + timingCacheMode = TimingCacheMode::kDISABLE; + } + else if (!timingCacheFile.empty()) + { + timingCacheMode = TimingCacheMode::kGLOBAL; + } + else + { + timingCacheMode = TimingCacheMode::kLOCAL; + } + getAndDelOption(arguments, "--errorOnTimingCacheMiss", errorOnTimingCacheMiss); + getAndDelOption(arguments, "--builderOptimizationLevel", builderOptimizationLevel); + getAndDelOption(arguments, "--maxTactics", maxTactics); + + std::string runtimePlatformArgs; + getAndDelOption(arguments, "--runtimePlatform", runtimePlatformArgs); + if (runtimePlatformArgs == "SameAsBuild" || runtimePlatformArgs.empty()) + { + runtimePlatform = RuntimePlatform::kSAME_AS_BUILD; + } + else if (runtimePlatformArgs == "WindowsAMD64") + { + runtimePlatform = RuntimePlatform::kWINDOWS_AMD64; + } + else + { + throw std::invalid_argument(std::string("Unknown runtime platform: ") + runtimePlatformArgs + + ". Valid options: SameAsBuild, WindowsAMD64."); + } + + std::string hardwareCompatibleArgs; + getAndDelOption(arguments, "--hardwareCompatibilityLevel", hardwareCompatibleArgs); + if (hardwareCompatibleArgs == "none" || hardwareCompatibleArgs.empty()) + { + hardwareCompatibilityLevel = HardwareCompatibilityLevel::kNONE; + } + else if (samplesCommon::toLower(hardwareCompatibleArgs) == "ampere+") + { + hardwareCompatibilityLevel = HardwareCompatibilityLevel::kAMPERE_PLUS; + } + else + { + throw std::invalid_argument(std::string("Unknown hardwareCompatibilityLevel: ") + hardwareCompatibleArgs + + ". Valid options: none, ampere+."); + } + + if (pluginInstanceNorm && (versionCompatible || hardwareCompatibilityLevel == HardwareCompatibilityLevel::kAMPERE_PLUS)) + { + throw std::invalid_argument("Plugin InstanceNorm cannot be used with version compatible or hardware compatible engines!"); + } + + getAndDelOption(arguments, "--maxAuxStreams", maxAuxStreams); + + std::string previewFeaturesBuf; + getAndDelOption(arguments, "--preview", previewFeaturesBuf); + std::vector previewFeaturesVec{splitToStringVec(previewFeaturesBuf, ',')}; + for (auto featureName : previewFeaturesVec) + { + bool enable{false}; + if (featureName.front() == '+') + { + enable = true; + } + else if (featureName.front() != '-') + { + throw std::invalid_argument( + "Preview features must be prefixed with + or -, indicating whether it should be enabled or disabled " + "respectively."); + } + featureName.erase(0, 1); + + PreviewFeature feat{}; + if (featureName == "profileSharing0806") + { + sample::gLogWarning + << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." + << std::endl; + } + else if (featureName == "aliasedPluginIO1003") + { + feat = PreviewFeature::kALIASED_PLUGIN_IO_10_03; + } + else + { + throw std::invalid_argument(std::string("Unknown preview feature: ") + featureName); + } + previewFeatures[static_cast(feat)] = enable; + } + + getAndDelOption(arguments, "--tempdir", tempdir); + getTempfileControls(arguments, "--tempfileControls", tempfileControls); + + std::string runtimeMode; + getAndDelOption(arguments, "--useRuntime", runtimeMode); + if (runtimeMode == "full") { - timingCacheMode = TimingCacheMode::kDISABLE; + useRuntime = RuntimeMode::kFULL; } - else if (!timingCacheFile.empty()) + else if (runtimeMode == "dispatch") { - timingCacheMode = TimingCacheMode::kGLOBAL; + useRuntime = RuntimeMode::kDISPATCH; } - else + else if (runtimeMode == "lean") { - timingCacheMode = TimingCacheMode::kLOCAL; + useRuntime = RuntimeMode::kLEAN; + } + else if (!runtimeMode.empty()) + { + throw std::invalid_argument(std::string("Unknown useRuntime: ") + runtimeMode); } + + if ((useRuntime == RuntimeMode::kDISPATCH || useRuntime == RuntimeMode::kLEAN) && !versionCompatible) + { + versionCompatible = true; + sample::gLogWarning << "Implicitly enabling --versionCompatible since --useRuntime=" << runtimeMode + << " is set." << std::endl; + } + + if (useRuntime != RuntimeMode::kFULL && !load) + { + throw std::invalid_argument(std::string("Building a TensorRT engine requires --useRuntime=full.")); + } + + getAndDelOption(arguments, "--leanDLLPath", leanDLLPath); + + // Don't delete the option because the inference option parser requires it + getOption(arguments, "--allowWeightStreaming", allowWeightStreaming); } void SystemOptions::parse(Arguments& arguments) { getAndDelOption(arguments, "--device", device); getAndDelOption(arguments, "--useDLACore", DLACore); - getAndDelOption(arguments, "--allowGPUFallback", fallback); +#if !TRT_WINML std::string pluginName; while (getAndDelOption(arguments, "--plugins", pluginName)) { + sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl; plugins.emplace_back(pluginName); } + while (getAndDelOption(arguments, "--staticPlugins", pluginName)) + { + plugins.emplace_back(pluginName); + } + while (getAndDelOption(arguments, "--setPluginsToSerialize", pluginName)) + { + setPluginsToSerialize.emplace_back(pluginName); + } + while (getAndDelOption(arguments, "--dynamicPlugins", pluginName)) + { + dynamicPlugins.emplace_back(pluginName); + } + getAndDelOption(arguments, "--ignoreParsedPluginLibs", ignoreParsedPluginLibs); +#endif } +constexpr int64_t WeightStreamingBudget::kDISABLE; +constexpr int64_t WeightStreamingBudget::kAUTOMATIC; + void InferenceOptions::parse(Arguments& arguments) { - getAndDelOption(arguments, "--streams", streams); + + if (getAndDelOption(arguments, "--streams", infStreams)) + { + sample::gLogWarning << "--streams flag has been deprecated, use --infStreams flag instead." << std::endl; + } + getAndDelOption(arguments, "--infStreams", infStreams); + getAndDelOption(arguments, "--iterations", iterations); getAndDelOption(arguments, "--duration", duration); getAndDelOption(arguments, "--warmUp", warmup); @@ -935,9 +1617,9 @@ void InferenceOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--threads", threads); getAndDelOption(arguments, "--useCudaGraph", graph); getAndDelOption(arguments, "--separateProfileRun", rerun); - getAndDelOption(arguments, "--buildOnly", skip); getAndDelOption(arguments, "--timeDeserialize", timeDeserialize); getAndDelOption(arguments, "--timeRefit", timeRefit); + getAndDelOption(arguments, "--persistentCacheRatio", persistentCacheRatio); std::string list; getAndDelOption(arguments, "--loadInputs", list); @@ -945,25 +1627,81 @@ void InferenceOptions::parse(Arguments& arguments) splitInsertKeyValue(inputsList, inputs); getShapesInference(arguments, shapes, "--shapes"); - getAndDelOption(arguments, "--batch", batch); + setOptProfile = getAndDelOption(arguments, "--useProfile", optProfileIndex); + + std::string allocationStrategyString; + getAndDelOption(arguments, "--allocationStrategy", allocationStrategyString); + if (allocationStrategyString == "static") + { + memoryAllocationStrategy = MemoryAllocationStrategy::kSTATIC; + } + else if (allocationStrategyString == "profile") + { + memoryAllocationStrategy = MemoryAllocationStrategy::kPROFILE; + } + else if (allocationStrategyString == "runtime") + { + memoryAllocationStrategy = MemoryAllocationStrategy::kRUNTIME; + } + else if (!allocationStrategyString.empty()) + { + throw std::invalid_argument(std::string("Unknown allocationStrategy: ") + allocationStrategyString); + } + + bool allowWs{false}; + getAndDelOption(arguments, "--allowWeightStreaming", allowWs); + bool wsBudgetFound = getAndDelOption(arguments, "--weightStreamingBudget", weightStreamingBudget); + if (wsBudgetFound && !allowWs) + { + throw std::invalid_argument( + "The weight streaming budget can only be set with --allowWeightStreaming specified."); + } + if (allowWs && weightStreamingBudget.isDisabled()) + { + sample::gLogWarning << "The engine can stream its weights but it will not at runtime because " + "--weightStreamingBudget unset or set to " + << WeightStreamingBudget::kDISABLE << "." << std::endl; + } + + std::string debugTensorList; + getAndDelOption(arguments, "--saveDebugTensors", debugTensorList); + std::vector fileNames{splitToStringVec(debugTensorList, ',')}; + splitInsertKeyValue(fileNames, debugTensorFileNames); } void ReportingOptions::parse(Arguments& arguments) { - getAndDelOption(arguments, "--percentile", percentile); getAndDelOption(arguments, "--avgRuns", avgs); getAndDelOption(arguments, "--verbose", verbose); getAndDelOption(arguments, "--dumpRefit", refit); getAndDelOption(arguments, "--dumpOutput", output); + getAndDelOption(arguments, "--dumpRawBindingsToFile", dumpRawBindings); getAndDelOption(arguments, "--dumpProfile", profile); getAndDelOption(arguments, "--dumpLayerInfo", layerInfo); + getAndDelOption(arguments, "--dumpOptimizationProfile", optProfileInfo); getAndDelOption(arguments, "--exportTimes", exportTimes); getAndDelOption(arguments, "--exportOutput", exportOutput); getAndDelOption(arguments, "--exportProfile", exportProfile); getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo); - if (percentile < 0 || percentile > 100) + + std::string percentileString; + getAndDelOption(arguments, "--percentile", percentileString); + std::vector percentileStrings = splitToStringVec(percentileString, ','); + if (!percentileStrings.empty()) + { + percentiles.clear(); + } + for (const auto& p : percentileStrings) { - throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]"); + percentiles.push_back(stringToValue(p)); + } + + for (auto percentile : percentiles) + { + if (percentile < 0.F || percentile > 100.F) + { + throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]"); + } } } @@ -983,61 +1721,40 @@ void AllOptions::parse(Arguments& arguments) system.parse(arguments); inference.parse(arguments); - // Use explicitBatch when input model is ONNX or when dynamic shapes are used. - const bool isOnnx{model.baseModel.format == ModelFormat::kONNX}; - const bool hasDynamicShapes{!build.shapes.empty() || !inference.shapes.empty()}; - const bool detectedExplicitBatch = isOnnx || hasDynamicShapes; - - // Throw an error if user tries to use --batch or --maxBatch when the engine has explicit batch dim. - const bool maxBatchWasSet{build.maxBatch != maxBatchNotProvided}; - const bool batchWasSet{inference.batch != batchNotProvided}; - if (detectedExplicitBatch && (maxBatchWasSet || batchWasSet)) + if (build.useRuntime != RuntimeMode::kFULL && inference.timeRefit) { - throw std::invalid_argument( - "The --batch and --maxBatch flags should not be used when the input model is ONNX or when dynamic shapes " - "are provided. Please use --optShapes and --shapes to set input shapes instead."); + throw std::invalid_argument("--timeRefit requires --useRuntime=full."); } - // If batch and/or maxBatch is not set and the engine has implicit batch dim, set them to default values. - if (!detectedExplicitBatch) + if (inference.optProfileIndex < static_cast(build.optProfiles.size())) { - // If batch is not set, set it to default value. - if (!batchWasSet) - { - inference.batch = defaultBatch; - } - // If maxBatch is not set, set it to be equal to batch. - if (!maxBatchWasSet) + // Propagate shape profile between builder and inference + for (auto const& s : build.optProfiles[inference.optProfileIndex]) { - build.maxBatch = inference.batch; + if (inference.shapes.find(s.first) == inference.shapes.end()) + { + insertShapesInference( + inference.shapes, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + } } - // MaxBatch should not be less than batch. - if (build.maxBatch < inference.batch) + for (auto const& s : inference.shapes) { - throw std::invalid_argument("Build max batch " + std::to_string(build.maxBatch) - + " is less than inference batch " + std::to_string(inference.batch)); + if (build.optProfiles[inference.optProfileIndex].find(s.first) + == build.optProfiles[inference.optProfileIndex].end()) + { + // assume min/opt/max all the same + insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMIN, + s.first, s.second); + insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kOPT, + s.first, s.second); + insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMAX, + s.first, s.second); + } } } - if (build.shapes.empty() && !inference.shapes.empty()) - { - // If --shapes are provided but --optShapes are not, assume that optShapes is the same as shapes. - for (auto& s : inference.shapes) - { - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMIN, s.first, s.second); - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kOPT, s.first, s.second); - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMAX, s.first, s.second); - } - } - else if (!build.shapes.empty() && inference.shapes.empty()) - { - // If --optShapes are provided but --shapes are not, assume that shapes is the same as optShapes. - for (auto& s : build.shapes) - { - insertShapesInference( - inference.shapes, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - } - } + // Set nvtxVerbosity to be the same as build-time profilingVerbosity. + inference.nvtxVerbosity = build.profilingVerbosity; reporting.parse(arguments); helps = parseHelp(arguments); @@ -1050,31 +1767,56 @@ void AllOptions::parse(Arguments& arguments) } if (build.safe && system.DLACore >= 0) { - auto checkSafeDLAFormats = [](std::vector const& fmt) { - return fmt.empty() ? false : std::all_of(fmt.begin(), fmt.end(), [](IOFormat const& pair) { + build.buildDLAStandalone = true; + } + if (build.runtimePlatform != nvinfer1::RuntimePlatform::kSAME_AS_BUILD) + { + build.skipInference = true; + } + if (build.buildDLAStandalone) + { + build.skipInference = true; + auto checkSafeDLAFormats = [](std::vector const& fmt, bool isInput) { + return fmt.empty() ? false : std::all_of(fmt.begin(), fmt.end(), [&](IOFormat const& pair) { bool supported{false}; - bool const isLINEAR{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kLINEAR)}; - bool const isCHW4{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW4)}; + bool const isDLA_LINEAR{ + pair.second == 1U << static_cast(nvinfer1::TensorFormat::kDLA_LINEAR)}; + bool const isHWC4{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW4) + || pair.second == 1U << static_cast(nvinfer1::TensorFormat::kDLA_HWC4)}; bool const isCHW32{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW32)}; bool const isCHW16{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW16)}; - supported |= pair.first == nvinfer1::DataType::kINT8 && (isLINEAR || isCHW4 || isCHW32); - supported |= pair.first == nvinfer1::DataType::kHALF && (isLINEAR || isCHW4 || isCHW16); + supported |= pair.first == nvinfer1::DataType::kINT8 + && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW32); + supported |= pair.first == nvinfer1::DataType::kHALF + && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW16); return supported; }); }; - if (!checkSafeDLAFormats(build.inputFormats) || !checkSafeDLAFormats(build.outputFormats)) + if (!checkSafeDLAFormats(build.inputFormats, true) || !checkSafeDLAFormats(build.outputFormats, false)) { throw std::invalid_argument( - "I/O formats for safe DLA capability are restricted to fp16/int8:linear, fp16:chw16 or int8:chw32"); + "I/O formats for safe DLA capability are restricted to fp16/int8:dla_linear, fp16/int8:hwc4, " + "fp16:chw16 or " + "int8:chw32"); } - if (system.fallback) + if (build.allowGPUFallback) { - throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for safe DLA capability"); + throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for DLA standalone mode"); } } } } +void TaskInferenceOptions::parse(Arguments& arguments) +{ + getAndDelOption(arguments, "engine", engine); + getAndDelOption(arguments, "device", device); + getAndDelOption(arguments, "batch", batch); + getAndDelOption(arguments, "DLACore", DLACore); + getAndDelOption(arguments, "graph", graph); + getAndDelOption(arguments, "persistentCacheRatio", persistentCacheRatio); +} + void SafeBuilderOptions::parse(Arguments& arguments) { auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { @@ -1097,13 +1839,36 @@ void SafeBuilderOptions::parse(Arguments& arguments) getFormats(outputFormats, "--outputIOFormats"); getAndDelOption(arguments, "--int8", int8); getAndDelOption(arguments, "--calib", calibFile); - getAndDelOption(arguments, "--consistency", consistency); getAndDelOption(arguments, "--std", standard); +#if !TRT_WINML std::string pluginName; while (getAndDelOption(arguments, "--plugins", pluginName)) { + sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl; plugins.emplace_back(pluginName); } + while (getAndDelOption(arguments, "--staticPlugins", pluginName)) + { + plugins.emplace_back(pluginName); + } +#endif + bool noBuilderCache{false}; + getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); + getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); + getAndDelOption(arguments, "--avgTiming", avgTiming); + if (noBuilderCache) + { + timingCacheMode = TimingCacheMode::kDISABLE; + } + else if (!timingCacheFile.empty()) + { + timingCacheMode = TimingCacheMode::kGLOBAL; + } + else + { + timingCacheMode = TimingCacheMode::kLOCAL; + } + getAndDelOption(arguments, "--sparsity", sparsity); } std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) @@ -1113,59 +1878,25 @@ std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) os << "Format: "; switch (options.format) { - case ModelFormat::kCAFFE: - { - os << "Caffe"; - break; - } case ModelFormat::kONNX: { os << "ONNX"; break; } - case ModelFormat::kUFF: - { - os << "UFF"; - break; - } - case ModelFormat::kANY: - os << "*"; - break; + case ModelFormat::kANY: os << "*"; break; } os << std::endl << "Model: " << options.model << std::endl; return os; } -std::ostream& operator<<(std::ostream& os, const UffInput& input) -{ - os << "Uff Inputs Layout: " << (input.NHWC ? "NHWC" : "NCHW") << std::endl; - for (const auto& i : input.inputs) - { - os << "Input: " << i.first << "," << i.second.d[0] << "," << i.second.d[1] << "," << i.second.d[2] << std::endl; - } - - return os; -} - std::ostream& operator<<(std::ostream& os, const ModelOptions& options) { os << options.baseModel; switch (options.baseModel.format) { - case ModelFormat::kCAFFE: - { - os << "Prototxt: " << options.prototxt << std::endl; - break; - } - case ModelFormat::kUFF: - { - os << options.uffInputs; - break; - } case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or the generic case - case ModelFormat::kANY: - break; + case ModelFormat::kANY: break; } os << "Output:"; @@ -1192,6 +1923,11 @@ std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) os << "fp16"; break; } + case nvinfer1::DataType::kBF16: + { + os << "bf16"; + break; + } case nvinfer1::DataType::kINT8: { os << "int8"; @@ -1207,6 +1943,26 @@ std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) os << "bool"; break; } + case nvinfer1::DataType::kUINT8: + { + os << "uint8"; + break; + } + case nvinfer1::DataType::kFP8: + { + os << "fp8"; + break; + } + case nvinfer1::DataType::kINT64: + { + os << "int64"; + break; + } + case nvinfer1::DataType::kINT4: + { + os << "int4"; + break; + } } return os; } @@ -1240,13 +1996,11 @@ std::ostream& operator<<(std::ostream& os, IOFormat const& format) os << "hwc8"; break; } -#if (NV_TENSORRT_MAJOR > 7) case nvinfer1::TensorFormat::kHWC16: { os << "hwc16"; break; } -#endif case nvinfer1::TensorFormat::kCHW4: { os << "chw4"; @@ -1277,6 +2031,11 @@ std::ostream& operator<<(std::ostream& os, IOFormat const& format) os << "hwc"; break; } + case nvinfer1::TensorFormat::kDHWC: + { + os << "dhwc"; + break; + } case nvinfer1::TensorFormat::kDLA_LINEAR: { os << "dla_linear"; @@ -1293,6 +2052,42 @@ std::ostream& operator<<(std::ostream& os, IOFormat const& format) return os; } +std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType) +{ + switch (devType) + { + case nvinfer1::DeviceType::kGPU: + { + os << "GPU"; + break; + } + case nvinfer1::DeviceType::kDLA: + { + os << "DLA"; + break; + } + } + return os; +} + +std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform) +{ + switch (platform) + { + case nvinfer1::RuntimePlatform::kSAME_AS_BUILD: + { + os << "Same As Build"; + break; + } + case nvinfer1::RuntimePlatform::kWINDOWS_AMD64: + { + os << "Windows AMD64"; + break; + } + } + return os; +} + std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) { int32_t i = 0; @@ -1319,29 +2114,76 @@ std::ostream& operator<<(std::ostream& os, LayerPrecisions const& layerPrecision return os; } +std::ostream& operator<<(std::ostream& os, LayerDeviceTypes const& layerDeviceTypes) +{ + int32_t i = 0; + for (auto const& layerDevicePair : layerDeviceTypes) + { + os << (i++ ? ", " : "") << layerDevicePair.first << ":" << layerDevicePair.second; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, StringSet const& stringSet) +{ + int64_t i = 0; + for (auto const& s : stringSet) + { + os << (i ? "," : "") << s; + ++i; + } + return os; +} + std::ostream& operator<<(std::ostream& os, const BuildOptions& options) { + // if loadEngine is specified, BuildOptions are N/A + if (options.load) + { + os << std::endl; + return os; + } // clang-format off os << "=== Build Options ===" << std::endl << - - "Max batch: "; printBatch(os, options.maxBatch) << std::endl << "Memory Pools: "; printMemoryPools(os, options) << std::endl << - "minTiming: " << options.minTiming << std::endl << "avgTiming: " << options.avgTiming << std::endl << "Precision: "; printPrecision(os, options) << std::endl << "LayerPrecisions: " << options.layerPrecisions << std::endl << + "Layer Device Types: " << options.layerDeviceTypes << std::endl << "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl << "Refit: " << boolToEnabled(options.refittable) << std::endl << + "Strip weights: " << boolToEnabled(options.stripWeights) << std::endl << + "Version Compatible: " << boolToEnabled(options.versionCompatible) << std::endl << +#if !TRT_WINML + "ONNX Plugin InstanceNorm: " << boolToEnabled(options.pluginInstanceNorm) << std::endl << +#endif + "TensorRT runtime: " << options.useRuntime << std::endl << + "Lean DLL Path: " << options.leanDLLPath << std::endl << + "Tempfile Controls: "; printTempfileControls(os, options.tempfileControls) << std::endl << + "Exclude Lean Runtime: " << boolToEnabled(options.excludeLeanRuntime) << std::endl << "Sparsity: "; printSparsity(os, options) << std::endl << "Safe mode: " << boolToEnabled(options.safe) << std::endl << + "Build DLA standalone loadable: " << boolToEnabled(options.buildDLAStandalone) << std::endl << + "Allow GPU fallback for DLA: " << boolToEnabled(options.allowGPUFallback) << std::endl << "DirectIO mode: " << boolToEnabled(options.directIO) << std::endl << "Restricted mode: " << boolToEnabled(options.restricted) << std::endl << + "Skip inference: " << boolToEnabled(options.skipInference) << std::endl << "Save engine: " << (options.save ? options.engine : "") << std::endl << "Load engine: " << (options.load ? options.engine : "") << std::endl << "Profiling verbosity: " << static_cast(options.profilingVerbosity) << std::endl << "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl << - "timingCacheMode: "; printTimingCache(os, options) << std::endl << - "timingCacheFile: " << options.timingCacheFile << std::endl; + "timingCacheMode: "; printTimingCache(os, options.timingCacheMode) << std::endl << + "timingCacheFile: " << options.timingCacheFile << std::endl << + "Enable Compilation Cache: "<< boolToEnabled(!options.disableCompilationCache) << std::endl << + "errorOnTimingCacheMiss: " << boolToEnabled(options.errorOnTimingCacheMiss) << std::endl << + "Preview Features: "; printPreviewFlags(os, options) << std::endl << + "MaxAuxStreams: " << options.maxAuxStreams << std::endl << + "BuilderOptimizationLevel: " << options.builderOptimizationLevel << std::endl << + "MaxTactics: " << options.maxTactics << std::endl << + "Calibration Profile Index: " << options.calibProfile << std::endl << + "Weight Streaming: " << boolToEnabled(options.allowWeightStreaming) << std::endl << + "Runtime Platform: " << options.runtimePlatform << std::endl << + "Debug Tensors: " << options.debugTensors << std::endl; // clang-format on auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { @@ -1351,7 +2193,7 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) } else { - for(const auto& f : formats) + for (const auto& f : formats) { os << direction << ": " << f << std::endl; } @@ -1360,8 +2202,11 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) printIOFormats(os, "Input(s)", options.inputFormats); printIOFormats(os, "Output(s)", options.outputFormats); - printShapes(os, "build", options.shapes); - printShapes(os, "calibration", options.shapesCalib); + for (size_t i = 0; i < options.optProfiles.size(); i++) + { + printShapes(os, "build", options.optProfiles[i], i); + } + printShapes(os, "calibration", options.shapesCalib, -1); return os; } @@ -1372,8 +2217,8 @@ std::ostream& operator<<(std::ostream& os, const SystemOptions& options) os << "=== System Options ===" << std::endl << "Device: " << options.device << std::endl << - "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << - (options.DLACore != -1 && options.fallback ? "(With GPU fallback)" : "") << std::endl; + "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << std::endl; +#if !TRT_WINML os << "Plugins:"; for (const auto& p : options.plugins) @@ -1382,13 +2227,32 @@ std::ostream& operator<<(std::ostream& os, const SystemOptions& options) } os << std::endl; + os << "setPluginsToSerialize:"; + + for (const auto& p : options.setPluginsToSerialize) + { + os << " " << p; + } + os << std::endl; + + os << "dynamicPlugins:"; + + for (const auto& p : options.dynamicPlugins) + { + os << " " << p; + } + os << std::endl; + + os << "ignoreParsedPluginLibs: " << options.ignoreParsedPluginLibs << std::endl; + os << std::endl; +#endif return os; // clang-format on } std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) { -// clang-format off + // clang-format off os << "=== Inference Options ===" << std::endl << "Batch: "; @@ -1400,48 +2264,71 @@ std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) { os << "Explicit" << std::endl; } - printShapes(os, "inference", options.shapes); - os << "Iterations: " << options.iterations << std::endl << - "Duration: " << options.duration << "s (+ " - << options.warmup << "ms warm up)" << std::endl << - "Sleep time: " << options.sleep << "ms" << std::endl << - "Idle time: " << options.idle << "ms" << std::endl << - "Streams: " << options.streams << std::endl << - "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << - "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << - "Spin-wait: " << boolToEnabled(options.spin) << std::endl << - "Multithreading: " << boolToEnabled(options.threads) << std::endl << - "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << - "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << - "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << - "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << - "Skip inference: " << boolToEnabled(options.skip) << std::endl; - -// clang-format on + printShapes(os, "inference", options.shapes, options.optProfileIndex); + + std::string wsBudget{"Disabled"}; + if (options.weightStreamingBudget.bytes == WeightStreamingBudget::kAUTOMATIC) + { + wsBudget = "Automatic"; + } + else if (options.weightStreamingBudget.bytes != WeightStreamingBudget::kDISABLE) + { + wsBudget = std::to_string(options.weightStreamingBudget.bytes) + " bytes"; + } + else if (options.weightStreamingBudget.percent != WeightStreamingBudget::kDISABLE) + { + wsBudget = std::to_string(options.weightStreamingBudget.percent) + "%"; + } + + os << "Iterations: " << options.iterations << std::endl << + "Duration: " << options.duration << "s (+ " + << options.warmup << "ms warm up)" << std::endl << + "Sleep time: " << options.sleep << "ms" << std::endl << + "Idle time: " << options.idle << "ms" << std::endl << + "Inference Streams: " << options.infStreams << std::endl << + "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << + "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << + "Spin-wait: " << boolToEnabled(options.spin) << std::endl << + "Multithreading: " << boolToEnabled(options.threads) << std::endl << + "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << + "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << + "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << + "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << + "NVTX verbosity: " << static_cast(options.nvtxVerbosity) << std::endl << + "Persistent Cache Ratio: " << static_cast(options.persistentCacheRatio) << std::endl << + "Optimization Profile Index: "<< options.optProfileIndex << std::endl << + "Weight Streaming Budget: " << wsBudget << std::endl; + // clang-format on + os << "Inputs:" << std::endl; for (const auto& input : options.inputs) { os << input.first << "<-" << input.second << std::endl; } + os << "Debug Tensor Save Destinations:" << std::endl; + for (auto const& fileName : options.debugTensorFileNames) + { + os << fileName.first << ": " << fileName.second << std::endl; + } + return os; } std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) { -// clang-format off - os << "=== Reporting Options ===" << std::endl << - - "Verbose: " << boolToEnabled(options.verbose) << std::endl << - "Averages: " << options.avgs << " inferences" << std::endl << - "Percentile: " << options.percentile << std::endl << - "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << - "Dump output: " << boolToEnabled(options.output) << std::endl << - "Profile: " << boolToEnabled(options.profile) << std::endl << - "Export timing to JSON file: " << options.exportTimes << std::endl << - "Export output to JSON file: " << options.exportOutput << std::endl << - "Export profile to JSON file: " << options.exportProfile << std::endl; -// clang-format on + // clang-format off + os << "=== Reporting Options ===" << std::endl << + "Verbose: " << boolToEnabled(options.verbose) << std::endl << + "Averages: " << options.avgs << " inferences" << std::endl << + "Percentiles: " << joinValuesToString(options.percentiles, ",") << std::endl << + "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << + "Dump output: " << boolToEnabled(options.output) << std::endl << + "Profile: " << boolToEnabled(options.profile) << std::endl << + "Export timing to JSON file: " << options.exportTimes << std::endl << + "Export output to JSON file: " << options.exportOutput << std::endl << + "Export profile to JSON file: " << options.exportProfile << std::endl; + // clang-format on return os; } @@ -1461,7 +2348,7 @@ std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) } else { - for(const auto& f : formats) + for (const auto& f : formats) { os << direction << ": " << f << std::endl; } @@ -1476,197 +2363,288 @@ std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) { os << " + INT8"; } + if (options.fp8) + { + os << " + FP8"; + } + if (options.int4) + { + os << " + INT4"; + } os << std::endl; os << "Calibration file: " << options.calibFile << std::endl; os << "Serialized Network: " << options.serialized << std::endl; printIOFormats(os, "Input(s)", options.inputFormats); printIOFormats(os, "Output(s)", options.outputFormats); - +#if !TRT_WINML os << "Plugins:"; for (const auto& p : options.plugins) { os << " " << p; } +#endif + os << "timingCacheMode: "; + printTimingCache(os, options.timingCacheMode) << std::endl; + os << "timingCacheFile: " << options.timingCacheFile << std::endl; os << std::endl; return os; } void BaseModelOptions::help(std::ostream& os) { -// clang-format off - os << " --uff= UFF model" << std::endl << - " --onnx= ONNX model" << std::endl << - " --model= Caffe model (default = no model, random weights used)" << std::endl; -// clang-format on -} - -void UffInput::help(std::ostream& os) -{ -// clang-format off - os << " --uffInput=,X,Y,Z Input blob name and its dimensions (X,Y,Z=C,H,W), it can be specified " - "multiple times; at least one is required for UFF models" << std::endl << - " --uffNHWC Set if inputs are in the NHWC layout instead of NCHW (use " << - "X,Y,Z=H,W,C order in --uffInput)" << std::endl; -// clang-format on + // clang-format off + os << " --onnx= ONNX model" << std::endl; + // clang-format on } void ModelOptions::help(std::ostream& os) { -// clang-format off + // clang-format off os << "=== Model Options ===" << std::endl; BaseModelOptions::help(os); - os << " --deploy= Caffe prototxt file" << std::endl << - " --output=[,]* Output names (it can be specified multiple times); at least one output " - "is required for UFF and Caffe" << std::endl; - UffInput::help(os); -// clang-format on + // clang-format on } void BuildOptions::help(std::ostream& os) { -// clang-format off - os << "=== Build Options ===" "\n" - " --maxBatch Set max batch size and build an implicit batch engine (default = same size as --batch)" "\n" - " This option should not be used when the input model is ONNX or when dynamic shapes are provided." "\n" - " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" - " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" - " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" - " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" - " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" - " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" - " Note: All three of min, opt and max shapes must be supplied." "\n" - " However, if only opt shapes is supplied then it will be expanded so" "\n" - " that min shapes and max shapes are set to the same values as opt shapes." "\n" - " Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." "\n" - " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" - " Each input shape is supplied as a key-value pair where key is the input name and" "\n" - " value is the dimensions (including the batch dimension) to be used for that input." "\n" - " Each key-value pair has the key and value separated using a colon (:)." "\n" - " Multiple input shapes can be provided via comma-separated key-value pairs." "\n" - " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" - " See --outputIOFormats help for the grammar of type and format list." "\n" - " Note: If this option is specified, please set comma-separated types and formats for all" "\n" - " inputs following the same order as network inputs ID (even if only one input" "\n" - " needs specifying IO format) or set the type and format once for broadcasting." "\n" - " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" - " Note: If this option is specified, please set comma-separated types and formats for all" "\n" - " outputs following the same order as network outputs ID (even if only one output" "\n" - " needs specifying IO format) or set the type and format once for broadcasting." "\n" - " IO Formats: spec ::= IOfmt[\",\"spec]" "\n" - " IOfmt ::= type:fmt" "\n" - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" - " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" "\n" - " --workspace=N Set workspace size in MiB." "\n" - " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s) in MiB." "\n" - " Note: Also accepts decimal sizes, e.g. 0.25MiB. Will be rounded down to the nearest integer bytes." "\n" - " Pool constraint: poolspec ::= poolfmt[\",\"poolspec]" "\n" - " poolfmt ::= pool:sizeInMiB" "\n" - " pool ::= \"workspace\"|\"dlaSRAM\"|\"dlaLocalDRAM\"|\"dlaGlobalDRAM\"" "\n" - " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)" "\n" - " --minTiming=M Set the minimum number of iterations used in kernel selection (default = " - << defaultMinTiming << ")" "\n" - " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " - << defaultAvgTiming << ")" "\n" - " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" - " and weights within the engine." "\n" - " --sparsity=spec Control sparsity (default = disabled). " "\n" - " Sparsity: spec ::= \"disable\", \"enable\", \"force\"" "\n" - " Note: Description about each of these options is as below" "\n" - " disable = do not enable sparse tactics in the builder (this is the default)" "\n" - " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" - " considered if the weights have the right sparsity pattern)" "\n" - " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" - " a sparsity pattern (even if you loaded a model yourself)" "\n" - " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" - " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" - " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" - " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" - " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" - " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" - " Precision Constaints: spec ::= \"none\" | \"obey\" | \"prefer\"" "\n" - " none = no constraints" "\n" - " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" - " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" - " otherwise" "\n" - " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" - " \"obey\" or \"prefer\". (default = none)" "\n" - " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" - " layerName to specify the default precision for all the unspecified layers." "\n" - " Per-layer precision spec ::= layerPrecision[\",\"spec]" "\n" - " layerPrecision ::= layerName\":\"precision" "\n" - " precision ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" - " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" - " \"obey\" or \"prefer\". (default = none)" "\n" - " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" - " layerName to specify the default precision for all the unspecified layers. If a layer has more than""\n" - " one output, then multiple types separated by \"+\" can be provided for this layer." "\n" - " Per-layer output type spec ::= layerOutputTypes[\",\"spec]" "\n" - " layerOutputTypes ::= layerName\":\"type" "\n" - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"[\"+\"type]" "\n" - " --calib= Read INT8 calibration cache file" "\n" - " --safe Enable build safety certified engine" "\n" - " --consistency Perform consistency checking on safety certified engine" "\n" - " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" - " --saveEngine= Save the serialized engine" "\n" - " --loadEngine= Load a serialized engine" "\n" - " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" - " tactic sources (default = all available tactics)." "\n" - " Note: Currently only cuDNN, cuBLAS and cuBLAS-LT are listed as optional tactics." "\n" - " Tactic Sources: tactics ::= [\",\"tactic]" "\n" - " tactic ::= (+|-)lib" "\n" - " lib ::= \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"" "\n" - " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" - " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" - " --timingCacheFile= Save/load the serialized global timing cache" "\n" + // clang-format off + os << "=== Build Options ===" "\n" + " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" + " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" + " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" + " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" + " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" + " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" + " Note: All three of min, opt and max shapes must be supplied." "\n" + " However, if only opt shapes is supplied then it will be expanded so" "\n" + " that min shapes and max shapes are set to the same values as opt shapes." "\n" + " Input names can be wrapped with escaped single quotes (ex: 'Input:0')." "\n" + " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" + " For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon." "\n" + " Each input shape is supplied as a key-value pair where key is the input name and" "\n" + " value is the dimensions (including the batch dimension) to be used for that input." "\n" + " Each key-value pair has the key and value separated using a colon (:)." "\n" + " Multiple input shapes can be provided via comma-separated key-value pairs, and each input name can" "\n" + " contain at most one wildcard ('*') character." "\n" + " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" + " See --outputIOFormats help for the grammar of type and format list." "\n" + " Note: If this option is specified, please set comma-separated types and formats for all" "\n" + " inputs following the same order as network inputs ID (even if only one input" "\n" + " needs specifying IO format) or set the type and format once for broadcasting." "\n" + " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" + " Note: If this option is specified, please set comma-separated types and formats for all" "\n" + " outputs following the same order as network outputs ID (even if only one output" "\n" + " needs specifying IO format) or set the type and format once for broadcasting." "\n" + R"( IO Formats: spec ::= IOfmt[","spec])" "\n" + " IOfmt ::= type:fmt" "\n" + R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int64"|"int8"|"uint8"|"bool")" "\n" + R"( fmt ::= ("chw"|"chw2"|"chw4"|"hwc8"|"chw16"|"chw32"|"dhwc8"|)" "\n" + R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4")["+"fmt])" "\n" + " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s)" "\n" + " Supports the following base-2 suffixes: " << getAvailableUnitSuffixes() << "." "\n" + " If none of suffixes is appended, the defualt unit is in MiB." "\n" + " Note: Also accepts decimal sizes, e.g. 0.25M. Will be rounded down to the nearest integer bytes." "\n" + " In particular, for dlaSRAM the bytes will be rounded down to the nearest power of 2." "\n" + R"( Pool constraint: poolspec ::= poolfmt[","poolspec])" "\n" + " poolfmt ::= pool:size" "\n" + R"( pool ::= "workspace"|"dlaSRAM"|"dlaLocalDRAM"|"dlaGlobalDRAM"|"tacticSharedMem")" "\n" + " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)." "\n" + " Please only assign once." "\n" + " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " + << defaultAvgTiming << ")" "\n" + " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" + " and weights within the engine." "\n" + " --stripWeights Strip weights from plan. This flag works with either refit or refit with identical weights. Default""\n" + " to latter, but you can switch to the former by enabling both --stripWeights and --refit at the same""\n" + " time." "\n" + " --stripAllWeights Alias for combining the --refit and --stripWeights options. It marks all weights as refittable," "\n" + " disregarding any performance impact. Additionally, it strips all refittable weights after the " "\n" + " engine is built." "\n" + " --weightless [Deprecated] this knob has been deprecated. Please use --stripWeights" "\n" + " --versionCompatible, --vc Mark the engine as version compatible. This allows the engine to be used with newer versions" "\n" + " of TensorRT on the same host OS, as well as TensorRT's dispatch and lean runtimes." "\n" +#if !TRT_WINML + " --pluginInstanceNorm, --pi Set `kNATIVE_INSTANCENORM` to false in the ONNX parser. This will cause the ONNX parser to use" "\n" + " a plugin InstanceNorm implementation over the native implementation when parsing." "\n" +#endif + R"( --useRuntime=runtime TensorRT runtime to execute engine. "lean" and "dispatch" require loading VC engine and do)" "\n" + " not support building an engine." "\n" + R"( runtime::= "full"|"lean"|"dispatch")" "\n" + " --leanDLLPath= External lean runtime DLL to use in version compatiable mode." "\n" + " --excludeLeanRuntime When --versionCompatible is enabled, this flag indicates that the generated engine should" "\n" + " not include an embedded lean runtime. If this is set, the user must explicitly specify a" "\n" + " valid lean runtime to use when loading the engine." "\n" + " --sparsity=spec Control sparsity (default = disabled). " "\n" + R"( Sparsity: spec ::= "disable", "enable", "force")" "\n" + " Note: Description about each of these options is as below" "\n" + " disable = do not enable sparse tactics in the builder (this is the default)" "\n" + " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" + " considered if the weights have the right sparsity pattern)" "\n" + " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" + " a sparsity pattern (even if you loaded a model yourself)" "\n" + " [Deprecated] this knob has been deprecated." "\n" + " Please use to rewrite the weights." "\n" + " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" + " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" + " --bf16 Enable bf16 precision, in addition to fp32 (default = disabled)" "\n" + " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" + " --fp8 Enable fp8 precision, in addition to fp32 (default = disabled)" "\n" + " --int4 Enable int4 precision, in addition to fp32 (default = disabled)" "\n" + " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" + " --stronglyTyped Create a strongly typed network. (default = disabled)" "\n" + " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" + " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" + R"( Precision Constraints: spec ::= "none" | "obey" | "prefer")" "\n" + " none = no constraints" "\n" + " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" + " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" + " otherwise" "\n" + " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" + R"( "obey" or "prefer". (default = none))" "\n" + R"( The specs are read left-to-right, and later ones override earlier ones. Each layer name can)" "\n" + " contain at most one wildcard ('*') character." "\n" + R"( Per-layer precision spec ::= layerPrecision[","spec])" "\n" + R"( layerPrecision ::= layerName":"precision)" "\n" + R"( precision ::= "fp32"|"fp16"|"bf16"|"int32"|"int8")" "\n" + " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" + R"( "obey" or "prefer". (default = none)" "\n" + R"( The specs are read left-to-right, and later ones override earlier ones. Each layer name can)" "\n" + " contain at most one wildcard ('*') character. If a layer has more than" "\n" + R"( one output, then multiple types separated by "+" can be provided for this layer.)" "\n" + R"( Per-layer output type spec ::= layerOutputTypes[","spec])" "\n" + R"( layerOutputTypes ::= layerName":"type)" "\n" + R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int8"["+"type])" "\n" + " --layerDeviceTypes=spec Specify layer-specific device type." "\n" + " The specs are read left-to-right, and later ones override earlier ones. If a layer does not have" "\n" + " a device type specified, the layer will opt for the default device type." "\n" + R"( Per-layer device type spec ::= layerDeviceTypePair[","spec])" "\n" + R"( layerDeviceTypePair ::= layerName":"deviceType)" "\n" + R"( deviceType ::= "GPU"|"DLA")" "\n" + " --calib= Read INT8 calibration cache file" "\n" + " --safe Enable build safety certified engine, if DLA is enable, --buildDLAStandalone will be specified" "\n" + " automatically (default = disabled)" "\n" + " --buildDLAStandalone Enable build DLA standalone loadable which can be loaded by cuDLA, when this option is enabled, " "\n" + " --allowGPUFallback is disallowed and --skipInference is enabled by default. Additionally, " "\n" + " specifying --inputIOFormats and --outputIOFormats restricts I/O data type and memory layout" "\n" + " (default = disabled)" "\n" + " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers (default = disabled)" "\n" + " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" + " --saveEngine= Save the serialized engine" "\n" + " --loadEngine= Load a serialized engine" "\n" + " --getPlanVersionOnly Print TensorRT version when loaded plan was created. Works without deserialization of the plan." "\n" + " Use together with --loadEngine. Supported only for engines created with 8.6 and forward." "\n" + " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" + " tactic sources (default = all available tactics)." "\n" + " Note: Currently only cuDNN, cuBLAS, cuBLAS-LT, and edge mask convolutions are listed as optional" "\n" + " tactics." "\n" + R"( Tactic Sources: tactics ::= [","tactic])" "\n" + " tactic ::= (+|-)lib" "\n" + R"( lib ::= "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS")" "\n" + R"( |"JIT_CONVOLUTIONS")" "\n" + " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" + " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" + " --noCompilationCache Disable Compilation cache in builder, and the cache is part of timing cache (default is to enable compilation cache)" "\n" + " --errorOnTimingCacheMiss Emit error when a tactic being timed is not present in the timing cache (default = false)" "\n" + " --timingCacheFile= Save/load the serialized global timing cache" "\n" + " --preview=features Specify preview feature to be used by adding (+) or removing (-) preview features from the default" "\n" + R"( Preview Features: features ::= [","feature])" "\n" + " feature ::= (+|-)flag" "\n" + R"( flag ::= "aliasedPluginIO1003")" "\n" + R"( |"profileSharing0806")" "\n" + " --builderOptimizationLevel Set the builder optimization level. (default is 3)" "\n" + " Higher level allows TensorRT to spend more building time for more optimization options." "\n" + " Valid values include integers from 0 to the maximum optimization level, which is currently 5." "\n" + " --maxTactics Set the maximum number of tactics to time when there is a choice of tactics. (default is -1)" "\n" + " Larger number of tactics allow TensorRT to spend more building time on evaluating tactics." "\n" + " Default value -1 means TensorRT can decide the number of tactics based on its own heuristic." "\n" + " --hardwareCompatibilityLevel=mode Make the engine file compatible with other GPU architectures. (default = none)" "\n" + R"( Hardware Compatibility Level: mode ::= "none" | "ampere+")" "\n" + " none = no compatibility" "\n" + " ampere+ = compatible with Ampere and newer GPUs" "\n" + " --runtimePlatform=platform Set the target platform for runtime execution. (default = SameAsBuild)" "\n" + " When this option is enabled, --skipInference is enabled by default." "\n" + R"( RuntimePlatfrom: platform ::= "SameAsBuild" | "WindowsAMD64")" "\n" + " SameAsBuild = no requirement for cross-platform compatibility." "\n" + " WindowsAMD64 = set the target platform for engine execution as Windows AMD64 system" "\n" + " --tempdir= Overrides the default temporary directory TensorRT will use when creating temporary files." "\n" + " See IRuntime::setTemporaryDirectory API documentation for more information." "\n" + " --tempfileControls=controls Controls what TensorRT is allowed to use when creating temporary executable files." "\n" + " Should be a comma-separated list with entries in the format (in_memory|temporary):(allow|deny)." "\n" + " in_memory: Controls whether TensorRT is allowed to create temporary in-memory executable files." "\n" + " temporary: Controls whether TensorRT is allowed to create temporary executable files in the" "\n" + " filesystem (in the directory given by --tempdir)." "\n" + " For example, to allow in-memory files and disallow temporary files:" "\n" + " --tempfileControls=in_memory:allow,temporary:deny" "\n" + R"( If a flag is unspecified, the default behavior is "allow".)" "\n" + " --maxAuxStreams=N Set maximum number of auxiliary streams per inference stream that TRT is allowed to use to run " "\n" + " kernels in parallel if the network contains ops that can run in parallel, with the cost of more " "\n" + " memory usage. Set this to 0 for optimal memory usage. (default = using heuristics)" "\n" + " --profile Build with dynamic shapes using a profile with the min/max/opt shapes provided. Can be specified" "\n" + " multiple times to create multiple profiles with contiguous index." "\n" + " (ex: --profile=0 --minShapes= --optShapes= --maxShapes= --profile=1 ...)" "\n" + " --calibProfile Select the optimization profile to calibrate by index. (default = " + << defaultOptProfileIndex << ")" "\n" + " --allowWeightStreaming Enable a weight streaming engine. Must be specified with --stronglyTyped. TensorRT will disable" "\n" + " weight streaming at runtime unless --weightStreamingBudget is specified." "\n" + " --markDebug Specify list of names of tensors to be marked as debug tensors. Separate names with a comma" "\n" ; -// clang-format on + // clang-format on os << std::flush; } void SystemOptions::help(std::ostream& os) { -// clang-format off + // clang-format off os << "=== System Options ===" << std::endl << " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << - " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers " - "(default = disabled)" << std::endl; - os << " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl; -// clang-format on +#if TRT_WINML + std::endl; +#else + " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << + " --dynamicPlugins Plugin library (.so) to load dynamically and may be serialized with the engine if they are included in --setPluginsToSerialize (can be specified multiple times)" << std::endl << + " --setPluginsToSerialize Plugin library (.so) to be serialized with the engine (can be specified multiple times)" << std::endl << + " --ignoreParsedPluginLibs By default, when building a version-compatible engine, plugin libraries specified by the ONNX parser " << std::endl << + " are implicitly serialized with the engine (unless --excludeLeanRuntime is specified) and loaded dynamically. " << std::endl << + " Enable this flag to ignore these plugin libraries instead." << std::endl; +#endif + // clang-format on } void InferenceOptions::help(std::ostream& os) { // clang-format off os << "=== Inference Options ===" << std::endl << - " --batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << - " This option should not be used when the engine is built from an ONNX model or when dynamic" << std::endl << - " shapes are provided when the engine is built." << std::endl << " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl << - " Note: Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." << std::endl << + R"( Note: Input names can be wrapped with escaped single quotes (ex: 'Input:0').)" << std::endl << " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl << + " For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon."<< std::endl << " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl << " value is the dimensions (including the batch dimension) to be used for that input." << std::endl << " Each key-value pair has the key and value separated using a colon (:)." << std::endl << - " Multiple input shapes can be provided via comma-separated key-value pairs." << std::endl << + " Multiple input shapes can be provided via comma-separated key-value pairs, and each input " << std::endl << + " name can contain at most one wildcard ('*') character." << std::endl << " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be " "wrapped with single quotes (ex: 'Input:0')" << std::endl << - " Input values spec ::= Ival[\",\"spec]" << std::endl << - " Ival ::= name\":\"file" << std::endl << + R"( Input values spec ::= Ival[","spec])" << std::endl << + R"( Ival ::= name":"file)" << std::endl << + " Consult the README for more information on generating files for custom inputs." << std::endl << " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl << " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = " << defaultWarmUp << ")" << std::endl << " --duration=N Run performance measurements for at least N seconds wallclock time (default = " << defaultDuration << ")" << std::endl << + " If -1 is specified, inference will keep running unless stopped manually" << std::endl << " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute " "(default = " << defaultSleep << ")" << std::endl << " --idleTime=N Sleep N milliseconds between two continuous iterations" "(default = " << defaultIdle << ")" << std::endl << - " --streams=N Instantiate N engines to use concurrently (default = " << defaultStreams << ")" << std::endl << + " --infStreams=N Instantiate N execution contexts to run inference concurrently " + "(default = " << defaultStreams << ")" << std::endl << " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl << " --noDataTransfers Disable DMA transfers to and from device (default = enabled)." << std::endl << - " --useManagedMemory Use managed memory instead of seperate host and device allocations (default = disabled)." << std::endl << + " --useManagedMemory Use managed memory instead of separate host and device allocations (default = disabled)." << std::endl << " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but " "increase CPU usage and power (default = disabled)" << std::endl << " --threads Enable multithreading to drive engines with independent threads" @@ -1677,42 +2655,84 @@ void InferenceOptions::help(std::ostream& os) " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl << " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second " "profile run will be executed (default = disabled)" << std::endl << - " --buildOnly Skip inference perf measurement (default = disabled)" << std::endl; + " --skipInference Exit after the engine has been built and skip inference perf measurement " + "(default = disabled)" << std::endl << + " --persistentCacheRatio Set the persistentCacheLimit in ratio, 0.5 represent half of max persistent L2 size " + "(default = 0)" << std::endl << + " --useProfile Set the optimization profile for the inference context " + "(default = " << defaultOptProfileIndex << " )." << std::endl << + " --allocationStrategy=spec Specify how the internal device memory for inference is allocated." << std::endl << + R"( Strategy: spec ::= "static", "profile", "runtime")" << std::endl << + " static = Allocate device memory based on max size across all profiles." << std::endl << + " profile = Allocate device memory based on max size of the current profile." << std::endl << + " runtime = Allocate device memory based on the actual input shapes." << std::endl << + " --saveDebugTensors Specify list of names of tensors to turn on the debug state" << std::endl << + " and filename to save raw outputs to." << std::endl << + " These tensors must be specified as debug tensors during build time." << std::endl << + R"( Input values spec ::= Ival[","spec])" << std::endl << + R"( Ival ::= name":"file)" << std::endl << + " --weightStreamingBudget Set the maximum amount of GPU memory TensorRT is allowed to use for weights." << std::endl << + " It can take on the following values:" << std::endl << + " -2: (default) Disable weight streaming at runtime." << std::endl << + " -1: TensorRT will automatically decide the budget." << std::endl << + " 0-100%: Percentage of streamable weights that reside on the GPU." << std::endl << + " 0% saves the most memory but will have the worst performance." << std::endl << + " Requires the % character." << std::endl << + " >=0B: The exact amount of streamable weights that reside on the GPU. Supports the " << std::endl << + " following base-2 suffixes: " << getAvailableUnitSuffixes() << "." << std::endl; // clang-format on } void ReportingOptions::help(std::ostream& os) { -// clang-format off + // clang-format off os << "=== Reporting Options ===" << std::endl << " --verbose Use verbose logging (default = false)" << std::endl << " --avgRuns=N Report performance measurements averaged over N consecutive " "iterations (default = " << defaultAvgRuns << ")" << std::endl << - " --percentile=P Report performance for the P percentage (0<=P<=100, 0 " + " --percentile=P1,P2,P3,... Report performance for the P1,P2,P3,... percentages (0<=P_i<=100, 0 " "representing max perf, and 100 representing min perf; (default" - " = " << defaultPercentile << "%)" << std::endl << + " = " << joinValuesToString(defaultPercentiles, ",") << "%)" << std::endl << " --dumpRefit Print the refittable layers and weights from a refittable " "engine" << std::endl << " --dumpOutput Print the output tensor(s) of the last inference iteration " "(default = disabled)" << std::endl << + " --dumpRawBindingsToFile Print the input/output tensor(s) of the last inference iteration to file" + "(default = disabled)" << std::endl << " --dumpProfile Print profile information per layer (default = disabled)" << std::endl << " --dumpLayerInfo Print layer information of the engine to console " "(default = disabled)" << std::endl << + " --dumpOptimizationProfile Print the optimization profile(s) information " + "(default = disabled)" << std::endl << " --exportTimes= Write the timing results in a json file (default = disabled)" << std::endl << " --exportOutput= Write the output tensors to a json file (default = disabled)" << std::endl << " --exportProfile= Write the profile information per layer in a json file " "(default = disabled)" << std::endl << " --exportLayerInfo= Write the layer information of the engine in a json file " "(default = disabled)" << std::endl; -// clang-format on + // clang-format on +} + +void TaskInferenceOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== Task Inference Options ===" << std::endl << + " engine= Specify a serialized engine for this task" << std::endl << + " device=N Specify a GPU device for this task" << std::endl << + " DLACore=N Specify a DLACore for this task" << std::endl << + " batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << + " This option should not be used for explicit batch engines" << std::endl << + " graph=1 Use cuda graph for this task" << std::endl << + " persistentCacheRatio=[0-1] Set the persistentCacheLimit ratio for this task (default = 0)" << std::endl; + // clang-format on } void helpHelp(std::ostream& os) { -// clang-format off + // clang-format off os << "=== Help ===" << std::endl << " --help, -h Print this message" << std::endl; -// clang-format on + // clang-format on } void AllOptions::help(std::ostream& os) @@ -1723,19 +2743,6 @@ void AllOptions::help(std::ostream& os) os << std::endl; InferenceOptions::help(os); os << std::endl; -// clang-format off - os << "=== Build and Inference Batch Options ===" << std::endl << - " When using implicit batch, the max batch size of the engine, if not given, " << std::endl << - " is set to the inference batch size;" << std::endl << - " when using explicit batch, if shapes are specified only for inference, they " << std::endl << - " will be used also as min/opt/max in the build profile; if shapes are " << std::endl << - " specified only for the build, the opt shapes will be used also for inference;" << std::endl << - " if both are specified, they must be compatible; and if explicit batch is " << std::endl << - " enabled but neither is specified, the model must provide complete static" << std::endl << - " dimensions, including batch size, for all inputs" << std::endl << - " Using ONNX models automatically forces explicit batch." << std::endl << - std::endl; - // clang-format on ReportingOptions::help(os); os << std::endl; SystemOptions::help(os); @@ -1745,7 +2752,7 @@ void AllOptions::help(std::ostream& os) void SafeBuilderOptions::printHelp(std::ostream& os) { -// clang-format off + // clang-format off os << "=== Mandatory ===" << std::endl << " --onnx= ONNX model" << std::endl << " " << std::endl << @@ -1759,20 +2766,34 @@ void SafeBuilderOptions::printHelp(std::ostream& os) " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << " outputs following the same order as network outputs ID (even if only one output" << std::endl << " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << - " IO Formats: spec ::= IOfmt[\",\"spec]" << std::endl << + R"( IO Formats: spec ::= IOfmt[","spec])" << std::endl << " IOfmt ::= type:fmt" << std::endl << - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" << std::endl << - " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl << + R"( type ::= "fp32"|"fp16"|"int32"|"int8")" << std::endl << + R"( fmt ::= ("chw"|"chw2"|"chw4"|"hwc8"|"chw16"|"chw32"|"dhwc8"|)" << std::endl << + R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4")["+"fmt])" << std::endl << " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl << - " --consistency Enable consistency check for serialized engine, (default = disabled)" << std::endl << " --std Build standard serialized engine, (default = disabled)" << std::endl << " --calib= Read INT8 calibration cache file" << std::endl << " --serialized= Save the serialized network" << std::endl << - " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl << +#if !TRT_WINML + " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << +#endif " --verbose or -v Use verbose logging (default = false)" << std::endl << " --help or -h Print this message" << std::endl << - " " << std::endl; -// clang-format on + " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" << std::endl << + " --timingCacheFile= Save/load the serialized global timing cache" << std::endl << + " --sparsity=spec Control sparsity (default = disabled). " << std::endl << + R"( Sparsity: spec ::= "disable", "enable", "force")" << std::endl << + " Note: Description about each of these options is as below" << std::endl << + " disable = do not enable sparse tactics in the builder (this is the default)" << std::endl << + " enable = enable sparse tactics in the builder (but these tactics will only be" << std::endl << + " considered if the weights have the right sparsity pattern)" << std::endl << + " force = enable sparse tactics in the builder and force-overwrite the weights to have" << std::endl << + " a sparsity pattern" << std::endl << + " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " << std::endl << + "" << defaultAvgTiming << ")" << std::endl << + "" << std::endl; + // clang-format on } } // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleOptions.h b/src/Detector/tensorrt_yolo/common/sampleOptions.h index 8975e1ea..8ca0a655 100644 --- a/src/Detector/tensorrt_yolo/common/sampleOptions.h +++ b/src/Detector/tensorrt_yolo/common/sampleOptions.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -23,6 +24,7 @@ #include #include #include +#include #include #include @@ -32,9 +34,10 @@ namespace sample { // Build default params -constexpr int32_t maxBatchNotProvided{0}; -constexpr int32_t defaultMinTiming{1}; constexpr int32_t defaultAvgTiming{8}; +constexpr int32_t defaultMaxAuxStreams{-1}; +constexpr int32_t defaultBuilderOptimizationLevel{-1}; +constexpr int32_t defaultMaxTactics{-1}; // System default params constexpr int32_t defaultDevice{0}; @@ -44,14 +47,16 @@ constexpr int32_t defaultBatch{1}; constexpr int32_t batchNotProvided{0}; constexpr int32_t defaultStreams{1}; constexpr int32_t defaultIterations{10}; +constexpr int32_t defaultOptProfileIndex{0}; constexpr float defaultWarmUp{200.F}; constexpr float defaultDuration{3.F}; constexpr float defaultSleep{}; constexpr float defaultIdle{}; +constexpr float defaultPersistentCacheRatio{0}; // Reporting default params constexpr int32_t defaultAvgRuns{10}; -constexpr float defaultPercentile{99}; +constexpr std::array defaultPercentiles{90, 95, 99}; enum class PrecisionConstraints { @@ -63,9 +68,7 @@ enum class PrecisionConstraints enum class ModelFormat { kANY, - kCAFFE, - kONNX, - kUFF + kONNX }; enum class SparsityFlag @@ -82,7 +85,55 @@ enum class TimingCacheMode kGLOBAL }; -using Arguments = std::unordered_multimap; +enum class MemoryAllocationStrategy +{ + kSTATIC, //< Allocate device memory based on max size across all profiles. + kPROFILE, //< Allocate device memory based on max size of the current profile. + kRUNTIME, //< Allocate device memory based on the current input shapes. +}; + +//! +//! \enum RuntimeMode +//! +//! \brief Used to dictate which TensorRT runtime library to dynamically load. +//! +enum class RuntimeMode +{ + //! Maps to libnvinfer.so or nvinfer.dll + kFULL, + + //! Maps to libnvinfer_dispatch.so or nvinfer_dispatch.dll + kDISPATCH, + + //! Maps to libnvinfer_lean.so or nvinfer_lean.dll + kLEAN, +}; + +inline std::ostream& operator<<(std::ostream& os, RuntimeMode const mode) +{ + switch (mode) + { + case RuntimeMode::kFULL: + { + os << "full"; + break; + } + case RuntimeMode::kDISPATCH: + { + os << "dispatch"; + break; + } + case RuntimeMode::kLEAN: + { + os << "lean"; + break; + } + } + + return os; +} + +using Arguments = std::unordered_multimap>; using IOFormat = std::pair; @@ -90,135 +141,201 @@ using ShapeRange = std::array, nvinfer1::EnumMax; using LayerOutputTypes = std::unordered_map>; +using LayerDeviceTypes = std::unordered_map; -struct Options -{ - virtual void parse(Arguments& arguments) = 0; -}; +using StringSet = std::unordered_set; -struct BaseModelOptions : public Options +class WeightStreamingBudget { - ModelFormat format{ModelFormat::kANY}; - std::string model; +public: + static constexpr int64_t kDISABLE{-2}; + static constexpr int64_t kAUTOMATIC{-1}; + int64_t bytes{kDISABLE}; + double percent{static_cast(100.0)}; - void parse(Arguments& arguments) override; + bool isDisabled() + { + return bytes == kDISABLE && percent == kDISABLE; + } +}; - static void help(std::ostream& out); +class Options +{ +public: + virtual ~Options() = default; + virtual void parse(Arguments& arguments) = 0; }; -struct UffInput : public Options +class BaseModelOptions : public Options { - std::vector> inputs; - bool NHWC{false}; +public: + ModelFormat format{ModelFormat::kANY}; + std::string model; void parse(Arguments& arguments) override; static void help(std::ostream& out); }; -struct ModelOptions : public Options +class ModelOptions : public Options { +public: BaseModelOptions baseModel; std::string prototxt; std::vector outputs; - UffInput uffInputs; void parse(Arguments& arguments) override; static void help(std::ostream& out); }; -struct BuildOptions : public Options +constexpr nvinfer1::TempfileControlFlags getTempfileControlDefaults() { - int32_t maxBatch{maxBatchNotProvided}; + using F = nvinfer1::TempfileControlFlag; + return (1U << static_cast(F::kALLOW_TEMPORARY_FILES)) + | (1U << static_cast(F::kALLOW_IN_MEMORY_FILES)); +} + +class BuildOptions : public Options +{ +public: + // Unit in MB. double workspace{-1.0}; + // Unit in MB. double dlaSRAM{-1.0}; + // Unit in MB. double dlaLocalDRAM{-1.0}; + // Unit in MB. double dlaGlobalDRAM{-1.0}; - int32_t minTiming{defaultMinTiming}; + // Unit in KB. + double tacticSharedMem{-1.0}; int32_t avgTiming{defaultAvgTiming}; + size_t calibProfile{defaultOptProfileIndex}; bool tf32{true}; bool fp16{false}; + bool bf16{false}; bool int8{false}; + bool fp8{false}; + bool int4{false}; + bool stronglyTyped{false}; bool directIO{false}; PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE}; LayerPrecisions layerPrecisions; LayerOutputTypes layerOutputTypes; + LayerDeviceTypes layerDeviceTypes; + StringSet debugTensors; + StringSet debugTensorStates; bool safe{false}; - bool consistency{false}; + bool buildDLAStandalone{false}; + bool allowGPUFallback{false}; bool restricted{false}; + bool skipInference{false}; bool save{false}; bool load{false}; bool refittable{false}; + bool stripWeights{false}; + bool versionCompatible{false}; + bool pluginInstanceNorm{false}; + bool excludeLeanRuntime{false}; + bool disableCompilationCache{false}; + int32_t builderOptimizationLevel{defaultBuilderOptimizationLevel}; + int32_t maxTactics{defaultMaxTactics}; SparsityFlag sparsity{SparsityFlag::kDISABLE}; -#if (NV_TENSORRT_MAJOR > 7) - nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; -#else - nvinfer1::ProfilingVerbosity profilingVerbosity{ nvinfer1::ProfilingVerbosity::kDEFAULT }; -#endif + nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; std::string engine; std::string calibration; - std::unordered_map shapes; - std::unordered_map shapesCalib; + using ShapeProfile = std::unordered_map; + std::vector optProfiles; + ShapeProfile shapesCalib; std::vector inputFormats; std::vector outputFormats; nvinfer1::TacticSources enabledTactics{0}; nvinfer1::TacticSources disabledTactics{0}; TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; std::string timingCacheFile{}; + bool errorOnTimingCacheMiss{false}; + // C++11 does not automatically generate hash function for enum class. + // Use int32_t to support C++11 compilers. + std::unordered_map previewFeatures; + nvinfer1::HardwareCompatibilityLevel hardwareCompatibilityLevel{nvinfer1::HardwareCompatibilityLevel::kNONE}; + nvinfer1::RuntimePlatform runtimePlatform{nvinfer1::RuntimePlatform::kSAME_AS_BUILD}; + std::string tempdir{}; + nvinfer1::TempfileControlFlags tempfileControls{getTempfileControlDefaults()}; + RuntimeMode useRuntime{RuntimeMode::kFULL}; + std::string leanDLLPath{}; + int32_t maxAuxStreams{defaultMaxAuxStreams}; + bool getPlanVersionOnly{false}; + + bool allowWeightStreaming{false}; + void parse(Arguments& arguments) override; static void help(std::ostream& out); }; -struct SystemOptions : public Options +class SystemOptions : public Options { +public: int32_t device{defaultDevice}; int32_t DLACore{-1}; - bool fallback{false}; + bool ignoreParsedPluginLibs{false}; std::vector plugins; + std::vector setPluginsToSerialize; + std::vector dynamicPlugins; void parse(Arguments& arguments) override; static void help(std::ostream& out); }; -struct InferenceOptions : public Options +class InferenceOptions : public Options { +public: int32_t batch{batchNotProvided}; int32_t iterations{defaultIterations}; - int32_t streams{defaultStreams}; + int32_t infStreams{defaultStreams}; + int32_t optProfileIndex{defaultOptProfileIndex}; float warmup{defaultWarmUp}; float duration{defaultDuration}; float sleep{defaultSleep}; float idle{defaultIdle}; + float persistentCacheRatio{defaultPersistentCacheRatio}; bool overlap{true}; bool skipTransfers{false}; bool useManaged{false}; bool spin{false}; bool threads{false}; bool graph{false}; - bool skip{false}; bool rerun{false}; bool timeDeserialize{false}; bool timeRefit{false}; + bool setOptProfile{false}; std::unordered_map inputs; - std::unordered_map> shapes; + using ShapeProfile = std::unordered_map>; + ShapeProfile shapes; + nvinfer1::ProfilingVerbosity nvtxVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; + MemoryAllocationStrategy memoryAllocationStrategy{MemoryAllocationStrategy::kSTATIC}; + std::unordered_map debugTensorFileNames; + + WeightStreamingBudget weightStreamingBudget; void parse(Arguments& arguments) override; static void help(std::ostream& out); }; -struct ReportingOptions : public Options +class ReportingOptions : public Options { +public: bool verbose{false}; int32_t avgs{defaultAvgRuns}; - float percentile{defaultPercentile}; + std::vector percentiles{defaultPercentiles.begin(), defaultPercentiles.end()}; bool refit{false}; bool output{false}; + bool dumpRawBindings{false}; bool profile{false}; bool layerInfo{false}; + bool optProfileInfo{false}; std::string exportTimes; std::string exportOutput; std::string exportProfile; @@ -229,8 +346,9 @@ struct ReportingOptions : public Options static void help(std::ostream& out); }; -struct SafeBuilderOptions : public Options +class SafeBuilderOptions : public Options { +public: std::string serialized{}; std::string onnxModelFile{}; bool help{false}; @@ -238,18 +356,24 @@ struct SafeBuilderOptions : public Options std::vector inputFormats; std::vector outputFormats; bool int8{false}; + bool fp8{false}; + bool int4{false}; std::string calibFile{}; std::vector plugins; - bool consistency{false}; bool standard{false}; + TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; + std::string timingCacheFile{}; + SparsityFlag sparsity{SparsityFlag::kDISABLE}; + int32_t avgTiming{defaultAvgTiming}; void parse(Arguments& arguments) override; static void printHelp(std::ostream& out); }; -struct AllOptions : public Options +class AllOptions : public Options { +public: ModelOptions model; BuildOptions build; SystemOptions system; @@ -262,6 +386,20 @@ struct AllOptions : public Options static void help(std::ostream& out); }; +class TaskInferenceOptions : public Options +{ +public: + std::string engine; + int32_t device{defaultDevice}; + int32_t DLACore{-1}; + int32_t batch{batchNotProvided}; + bool graph{false}; + float persistentCacheRatio{defaultPersistentCacheRatio}; + void parse(Arguments& arguments) override; + static void help(std::ostream& out); +}; + + Arguments argsToArgumentsMap(int32_t argc, char* argv[]); bool parseHelp(Arguments& arguments); @@ -272,8 +410,6 @@ void helpHelp(std::ostream& out); std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options); -std::ostream& operator<<(std::ostream& os, const UffInput& input); - std::ostream& operator<<(std::ostream& os, const IOFormat& format); std::ostream& operator<<(std::ostream& os, const ShapeRange& dims); @@ -292,6 +428,10 @@ std::ostream& operator<<(std::ostream& os, const AllOptions& options); std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options); +std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype); + +std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType); + inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) { for (int32_t i = 0; i < dims.nbDims; ++i) @@ -329,13 +469,11 @@ inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole ro os << "Constant"; break; } -#if (NV_TENSORRT_MAJOR > 7) case nvinfer1::WeightsRole::kANY: { os << "Any"; break; } -#endif } return os; diff --git a/src/Detector/tensorrt_yolo/common/sampleReporting.cpp b/src/Detector/tensorrt_yolo/common/sampleReporting.cpp index a92938c5..e9dda6e0 100644 --- a/src/Detector/tensorrt_yolo/common/sampleReporting.cpp +++ b/src/Detector/tensorrt_yolo/common/sampleReporting.cpp @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -26,6 +27,8 @@ #include "sampleOptions.h" #include "sampleReporting.h" +using namespace nvinfer1; + namespace sample { @@ -45,7 +48,7 @@ float findPercentile(float percentile, std::vector const& timings { return std::numeric_limits::infinity(); } - if (percentile < 0.0f || percentile > 100.0f) + if (percentile < 0.F || percentile > 100.F) { throw std::runtime_error("percentile is not in [0, 100]!"); } @@ -99,8 +102,26 @@ float findCoeffOfVariance(std::vector const& timings, T const& to inline InferenceTime traceToTiming(const InferenceTrace& a) { - return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart), - (a.d2hEnd - a.d2hStart), (a.d2hEnd - a.h2dStart)); + return InferenceTime( + (a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart)); +} + +inline std::string dimsToString(Dims const& shape) +{ + std::stringstream ss; + + if (shape.nbDims == 0) + { + ss << "scalar"; + } + else + { + for (int32_t i = 0; i < shape.nbDims; i++) + { + ss << shape.d[i] << (i != shape.nbDims - 1 ? "x" : ""); + } + } + return ss.str(); } } // namespace @@ -113,29 +134,40 @@ void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTi void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os) { - int32_t count = 0; + int64_t count = 0; InferenceTime sum; os << std::endl; os << "=== Trace details ===" << std::endl; os << "Trace averages of " << runsPerAvg << " runs:" << std::endl; - for (auto const& t : timings) + + // Show only the first N lines and the last N lines, where N = kTIMING_PRINT_THRESHOLD. + constexpr int64_t kTIMING_PRINT_THRESHOLD{200}; + int64_t const maxNbTimings{kTIMING_PRINT_THRESHOLD * runsPerAvg}; + + for (int64_t idx = 0, size = timings.size(); idx < size; ++idx) { - sum += t; + // Omit some latency printing to avoid very long logs. + if (size > 2 * maxNbTimings && idx == maxNbTimings) + { + os << "... Omitting " << (size - 2 * maxNbTimings) << " lines" << std::endl; + idx = size - kTIMING_PRINT_THRESHOLD * runsPerAvg - 1; + } + + sum += timings[idx]; if (++count == runsPerAvg) { // clang-format off os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg - << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg - << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl; + << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg + << " ms)" << std::endl; // clang-format on count = 0; sum.enq = 0; sum.h2d = 0; sum.compute = 0; sum.d2h = 0; - sum.e2e = 0; } } } @@ -166,14 +198,10 @@ void printMetricExplanations(std::ostream& os) os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H Latency. This is the latency to infer a " "single query." << std::endl; - os << "End-to-End Host Latency: the duration from when the H2D of a query is called to when the D2H of the same " - "query is completed, which includes the latency to wait for the completion of the previous query. This is " - "the latency of a query if multiple queries are enqueued consecutively." - << std::endl; } PerformanceResult getPerformanceResult(std::vector const& timings, - std::function metricGetter, float percentile) + std::function metricGetter, std::vector const& percentiles) { auto const metricComparator = [metricGetter](InferenceTime const& a, InferenceTime const& b) { return metricGetter(a) < metricGetter(b); }; @@ -183,40 +211,44 @@ PerformanceResult getPerformanceResult(std::vector const& timings PerformanceResult result; result.min = metricGetter(newTimings.front()); result.max = metricGetter(newTimings.back()); - result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f, metricAccumulator) / newTimings.size(); + result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) / newTimings.size(); result.median = findMedian(newTimings, metricGetter); - result.percentile = findPercentile(percentile, newTimings, metricGetter); + for (auto percentile : percentiles) + { + result.percentiles.emplace_back(findPercentile(percentile, newTimings, metricGetter)); + } result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean); return result; } -void printEpilog(std::vector const& timings, float walltimeMs, float percentile, int32_t batchSize, - std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) +void printEpilog(std::vector const& timings, float walltimeMs, std::vector const& percentiles, + int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) { float const throughput = batchSize * timings.size() / walltimeMs * 1000; auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; - auto const latencyResult = getPerformanceResult(timings, getLatency, percentile); - - auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; }; - auto const e2eLatencyResult = getPerformanceResult(timings, getEndToEnd, percentile); + auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles); auto const getEnqueue = [](InferenceTime const& t) { return t.enq; }; - auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentile); + auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentiles); auto const getH2d = [](InferenceTime const& t) { return t.h2d; }; - auto const h2dResult = getPerformanceResult(timings, getH2d, percentile); + auto const h2dResult = getPerformanceResult(timings, getH2d, percentiles); auto const getCompute = [](InferenceTime const& t) { return t.compute; }; - auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentile); + auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentiles); auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; - auto const d2hResult = getPerformanceResult(timings, getD2h, percentile); + auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles); - auto const toPerfString = [percentile](const PerformanceResult& r) { + auto const toPerfString = [&](const PerformanceResult& r) { std::stringstream s; s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, " - << "median = " << r.median << " ms, percentile(" << percentile << "%) = " << r.percentile << " ms"; + << "median = " << r.median << " ms"; + for (int32_t i = 0, n = percentiles.size(); i < n; ++i) + { + s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms"; + } return s.str(); }; @@ -224,7 +256,6 @@ void printEpilog(std::vector const& timings, float walltimeMs, fl osInfo << "=== Performance summary ===" << std::endl; osInfo << "Throughput: " << throughput << " qps" << std::endl; osInfo << "Latency: " << toPerfString(latencyResult) << std::endl; - osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult) << std::endl; osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl; osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl; osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; @@ -268,6 +299,13 @@ void printEpilog(std::vector const& timings, float walltimeMs, fl << "stability." << std::endl; } + // Report warnings if multiple inference streams are used. + if (infStreams > 1) + { + osWarning << "* Multiple inference streams are used. Latencies may not be accurate since inferences may run in " + << " parallel. Please use \"Throughput\" as the performance metric instead." << std::endl; + } + // Explain what the metrics mean. osInfo << "Explanations of the performance metrics are printed in the verbose logs." << std::endl; printMetricExplanations(osVerbose); @@ -275,27 +313,28 @@ void printEpilog(std::vector const& timings, float walltimeMs, fl osInfo << std::endl; } -void printPerformanceReport(std::vector const& trace, const ReportingOptions& reporting, float warmupMs, - int32_t batchSize, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) +void printPerformanceReport(std::vector const& trace, ReportingOptions const& reportingOpts, + InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) { + int32_t batchSize = infOpts.batch; + float const warmupMs = infOpts.warmup; auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; }; auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); int32_t const warmups = noWarmup - trace.begin(); float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; - // when implicit batch used, batchSize = options.inference.batch, which is parsed through --batch - // when explicit batch used, batchSize = options.inference.batch = 0 // treat inference with explicit batch as a single query and report the throughput batchSize = batchSize ? batchSize : 1; printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); std::vector timings(trace.size() - warmups); std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); - printTiming(timings, reporting.avgs, osInfo); - printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo, osWarning, osVerbose); + printTiming(timings, reportingOpts.avgs, osInfo); + printEpilog( + timings, benchTime, reportingOpts.percentiles, batchSize, infOpts.infStreams, osInfo, osWarning, osVerbose); - if (!reporting.exportTimes.empty()) + if (!reportingOpts.exportTimes.empty()) { - exportJSONTrace(trace, reporting.exportTimes); + exportJSONTrace(trace, reportingOpts.exportTimes, warmups); } } @@ -303,15 +342,16 @@ void printPerformanceReport(std::vector const& trace, const Repo //! [ value, ...] //! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end h2d" : time, "start compute" : time, //! "end compute" : time, "start d2h" : time, "end d2h" : time, "h2d" : time, "compute" : time, -//! "d2h" : time, "latency" : time, "end to end" : time } +//! "d2h" : time, "latency" : time } //! -void exportJSONTrace(std::vector const& trace, std::string const& fileName) +void exportJSONTrace(std::vector const& trace, std::string const& fileName, int32_t const nbWarmups) { std::ofstream os(fileName, std::ofstream::trunc); os << "[" << std::endl; char const* sep = " "; - for (auto const& t : trace) + for (auto iter = trace.begin() + nbWarmups; iter < trace.end(); ++iter) { + auto const& t = *iter; InferenceTime const it(traceToTiming(t)); os << sep << "{ "; sep = ", "; @@ -321,8 +361,8 @@ void exportJSONTrace(std::vector const& trace, std::string const << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep << "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep << "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep - << "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep - << "\"endToEndMs\" : " << it.e2e << " }" << std::endl; + << "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << " }" + << std::endl; // clang-format on } os << "]" << std::endl; @@ -346,42 +386,49 @@ void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept } } - mIterator->timeMs += timeMs; + mIterator->timeMs.push_back(timeMs); ++mIterator; } void Profiler::print(std::ostream& os) const noexcept { - std::string const nameHdr("Layer"); - std::string const timeHdr(" Time (ms)"); - std::string const avgHdr(" Avg. Time (ms)"); - std::string const percentageHdr(" Time %"); + std::string const nameHdr(" Layer"); + std::string const timeHdr(" Time(ms)"); + std::string const avgHdr(" Avg.(ms)"); + std::string const medHdr(" Median(ms)"); + std::string const percentageHdr(" Time(%)"); float const totalTimeMs = getTotalTime(); - auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) { return a.name.size() < b.name.size(); }; - auto const longestName = std::max_element(mLayers.begin(), mLayers.end(), cmpLayer); - auto const nameLength = std::max(longestName->name.size() + 1, nameHdr.size()); auto const timeLength = timeHdr.size(); auto const avgLength = avgHdr.size(); + auto const medLength = medHdr.size(); auto const percentageLength = percentageHdr.size(); os << std::endl << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl - << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr << std::endl; + << timeHdr << avgHdr << medHdr << percentageHdr << nameHdr << std::endl; for (auto const& p : mLayers) { + if (p.timeMs.empty() || getTotalTime(p) == 0.F) + { + // there is no point to print profiling for layer that didn't run at all + continue; + } // clang-format off - os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs - << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100 - << std::endl; + os << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p) + << std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p) + << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p) + << std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100 + << " " << p.name << std::endl; } { - os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2) + os << std::setw(timeLength) << std::fixed << std::setprecision(2) << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl; + << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime() + << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 + << " Total" << std::endl; // clang-format on } os << std::endl; @@ -397,10 +444,11 @@ void Profiler::exportJSONProfile(std::string const& fileName) const noexcept for (auto const& l : mLayers) { // clang-format off - os << ", {" << " \"name\" : \"" << l.name << "\"" - ", \"timeMs\" : " << l.timeMs - << ", \"averageMs\" : " << l.timeMs / mUpdatesCount - << ", \"percentage\" : " << l.timeMs / totalTimeMs * 100 + os << ", {" << R"( "name" : ")" << l.name << R"(")" + R"(, "timeMs" : )" << getTotalTime(l) + << R"(, "averageMs" : )" << getAvgTime(l) + << R"(, "medianMs" : )" << getMedianTime(l) + << R"(, "percentage" : )" << getTotalTime(l) / totalTimeMs * 100 << " }" << std::endl; // clang-format on } @@ -415,8 +463,13 @@ void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bind void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) { - os << "Output Tensors:" << std::endl; - bindings.dumpOutputs(context, os); + auto isOutput = [](Binding const& b) { return !b.isInput; }; + bindings.dumpBindings(context, isOutput, os); +} + +void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) +{ + bindings.dumpRawBindingToFiles(context, os); } void exportJSONOutput( @@ -429,10 +482,10 @@ void exportJSONOutput( for (auto const& binding : output) { // clang-format off - os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl; + os << sep << R"({ "name" : ")" << binding.first << "\"" << std::endl; sep = ", "; - os << " " << sep << "\"dimensions\" : \""; - bindings.dumpBindingDimensions(binding.second, context, os); + os << " " << sep << R"("dimensions" : ")"; + bindings.dumpBindingDimensions(binding.first, context, os); os << "\"" << std::endl; os << " " << sep << "\"values\" : [ "; bindings.dumpBindingValues(context, binding.second, os, sep, batch); @@ -442,4 +495,115 @@ void exportJSONOutput( os << "]" << std::endl; } +void exportJSONOutput( + nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch); + +void printLayerInfo( + ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context) +{ + if (reporting.layerInfo) + { + sample::gLogInfo << "Layer Information:" << std::endl; + sample::gLogInfo << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kONELINE) + << std::flush; + } + if (!reporting.exportLayerInfo.empty()) + { + std::ofstream os(reporting.exportLayerInfo, std::ofstream::trunc); + os << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kJSON) << std::flush; + } +} + +void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine) +{ + if (reporting.optProfileInfo) + { + sample::gLogInfo << "Optimization Profile Information:" << std::endl; + for (int32_t i = 0; i < engine->getNbOptimizationProfiles(); i++) + { + for (int32_t j = 0, e = engine->getNbIOTensors(); j < e; j++) + { + auto const tensorName = engine->getIOTensorName(j); + + if (engine->getTensorIOMode(tensorName) == nvinfer1::TensorIOMode::kINPUT) + { + auto tensorMinShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kMIN); + auto tensorOptShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kOPT); + auto tensorMaxShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kMAX); + + sample::gLogInfo << "Model input " << tensorName << " (profile " << i << "): " + << "min=" << dimsToString(tensorMinShape) + << ", opt=" << dimsToString(tensorOptShape) + << ", max=" << dimsToString(tensorMaxShape) << std::endl; + } + } + } + } +} + +void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv) +{ + if (reporting.profile) + { + iEnv.profiler->print(sample::gLogInfo); + } + if (!reporting.exportProfile.empty()) + { + iEnv.profiler->exportJSONProfile(reporting.exportProfile); + } + + // Print an warning about total per-layer latency when auxiliary streams are used. + if (!iEnv.safe && (reporting.profile || !reporting.exportProfile.empty())) + { + int32_t const nbAuxStreams = iEnv.engine.get()->getNbAuxStreams(); + if (nbAuxStreams > 0) + { + sample::gLogWarning << "The engine uses " << nbAuxStreams << " auxiliary streams, so the \"Total\" latency " + << "may not be accurate because some layers may have run in parallel!" << std::endl; + } + } +} + +namespace details +{ +void dump(std::unique_ptr const& context, std::unique_ptr const& binding, + ReportingOptions const& reporting, int32_t batch) +{ + if (!context) + { + sample::gLogError << "Empty context! Skip printing outputs." << std::endl; + return; + } + if (reporting.output) + { + dumpOutputs(*context, *binding, sample::gLogInfo); + } + if (reporting.dumpRawBindings) + { + dumpRawBindingsToFiles(*context, *binding, sample::gLogInfo); + } + if (!reporting.exportOutput.empty()) + { + exportJSONOutput(*context, *binding, reporting.exportOutput, batch); + } +} +} // namespace details + +void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch) +{ + auto const& binding = iEnv.bindings.at(0); + if (!binding) + { + sample::gLogError << "Empty bindings! Skip printing outputs." << std::endl; + return; + } + if (iEnv.safe) + { + sample::gLogError << "Safe inferernce is not supported!" << std::endl; + return; + } + auto const& context = iEnv.contexts.at(0); + details::dump(context, binding, reporting, batch); +} + } // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleReporting.h b/src/Detector/tensorrt_yolo/common/sampleReporting.h index 5f730987..922ef3c8 100644 --- a/src/Detector/tensorrt_yolo/common/sampleReporting.h +++ b/src/Detector/tensorrt_yolo/common/sampleReporting.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -19,27 +20,26 @@ #include #include - -#include "NvInfer.h" +#include #include "sampleOptions.h" -#include "sampleUtils.h" namespace sample { +class Bindings; + //! //! \struct InferenceTime //! \brief Measurement times in milliseconds //! struct InferenceTime { - InferenceTime(float q, float i, float c, float o, float e) + InferenceTime(float q, float i, float c, float o) : enq(q) , h2d(i) , compute(c) , d2h(o) - , e2e(e) { } @@ -54,7 +54,6 @@ struct InferenceTime float h2d{0}; // Host to Device float compute{0}; // Compute float d2h{0}; // Device to Host - float e2e{0}; // end to end // ideal latency float latency() const @@ -102,7 +101,7 @@ struct InferenceTrace inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) { - return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h, a.e2e + b.e2e); + return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h); } inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) @@ -116,12 +115,12 @@ inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) //! struct PerformanceResult { - float min{0}; - float max{0}; - float mean{0}; - float median{0}; - float percentile{0}; - float coeffVar{0}; // coefficient of variation + float min{0.F}; + float max{0.F}; + float mean{0.F}; + float median{0.F}; + std::vector percentiles; + float coeffVar{0.F}; // coefficient of variation }; //! @@ -137,14 +136,14 @@ void printTiming(std::vector const& timings, int32_t runsPerAvg, //! //! \brief Print the performance summary of a trace //! -void printEpilog(std::vector const& timings, float percentile, int32_t batchSize, std::ostream& osInfo, - std::ostream& osWarning, std::ostream& osVerbose); +void printEpilog(std::vector const& timings, std::vector const& percentiles, int32_t batchSize, + std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose); //! //! \brief Get the result of a specific performance metric from a trace //! PerformanceResult getPerformanceResult(std::vector const& timings, - std::function metricGetter, float percentile); + std::function metricGetter, std::vector const& percentiles); //! //! \brief Print the explanations of the performance metrics printed in printEpilog() function. @@ -154,13 +153,14 @@ void printMetricExplanations(std::ostream& os); //! //! \brief Print and summarize a timing trace //! -void printPerformanceReport(std::vector const& trace, ReportingOptions const& reporting, float warmupMs, - int32_t batchSize, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose); +void printPerformanceReport(std::vector const& trace, ReportingOptions const& reportingOpts, + InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose); //! //! \brief Export a timing trace to JSON file //! -void exportJSONTrace(std::vector const& trace, std::string const& fileName); +void exportJSONTrace( + std::vector const& InferenceTime, std::string const& fileName, int32_t const nbWarmups); //! //! \brief Print input tensors to stream @@ -172,6 +172,8 @@ void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bind //! void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); +void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); + //! //! \brief Export output tensors to JSON file //! @@ -185,7 +187,7 @@ void exportJSONOutput( struct LayerProfile { std::string name; - float timeMs{0}; + std::vector timeMs; }; //! @@ -208,8 +210,58 @@ class Profiler : public nvinfer1::IProfiler private: float getTotalTime() const noexcept { - auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs; }; - return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime); + auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { + return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus()); + }; + return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime); + } + + float getMedianTime() const noexcept + { + if (mLayers.empty()) + { + return 0.F; + } + std::vector totalTime; + for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run) + { + auto const layerTime + = [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; }; + auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime); + totalTime.push_back(t); + } + return median(totalTime); + } + + float getMedianTime(LayerProfile const& p) const noexcept + { + return median(p.timeMs); + } + + static float median(std::vector vals) + { + if (vals.empty()) + { + return 0.F; + } + std::sort(vals.begin(), vals.end()); + if (vals.size() % 2U == 1U) + { + return vals[vals.size() / 2U]; + } + return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F; + } + + //! return the total runtime of given layer profile + float getTotalTime(LayerProfile const& p) const noexcept + { + auto const& vals = p.timeMs; + return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus()); + } + + float getAvgTime(LayerProfile const& p) const noexcept + { + return getTotalTime(p) / p.timeMs.size(); } std::vector mLayers; @@ -217,6 +269,30 @@ class Profiler : public nvinfer1::IProfiler int32_t mUpdatesCount{0}; }; +//! +//! \brief Print layer info to logger or export it to output JSON file. +//! +void printLayerInfo( + ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context); + +//! +//! \brief Print optimization profile info to logger. +//! +void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine); + +//! Forward declaration. +struct InferenceEnvironment; + +//! +//! \brief Print per-layer perf profile data to logger or export it to output JSON file. +//! +void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv); + +//! +//! \brief Print binding output values to logger or export them to output JSON file. +//! +void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch); + } // namespace sample #endif // TRT_SAMPLE_REPORTING_H diff --git a/src/Detector/tensorrt_yolo/common/sampleUtils.cpp b/src/Detector/tensorrt_yolo/common/sampleUtils.cpp new file mode 100644 index 00000000..689e5857 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/sampleUtils.cpp @@ -0,0 +1,587 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sampleUtils.h" +#include "bfloat16.h" +#include "half.h" + +using namespace nvinfer1; + +namespace sample +{ + +size_t dataTypeSize(nvinfer1::DataType dataType) +{ + switch (dataType) + { + case nvinfer1::DataType::kINT64: return 8U; + case nvinfer1::DataType::kINT32: + case nvinfer1::DataType::kFLOAT: return 4U; + case nvinfer1::DataType::kBF16: + case nvinfer1::DataType::kHALF: return 2U; + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kUINT8: + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kFP8: return 1U; + case nvinfer1::DataType::kINT4: + ASSERT(false && "Element size is not implemented for sub-byte data-types."); + } + return 0; +} + +int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch) +{ + int64_t maxNbElems = 1; + for (int32_t i = 0; i < dims.nbDims; ++i) + { + // Get effective length of axis. + int64_t d = dims.d[i]; + // Any dimension is 0, it is an empty tensor. + if (d == 0) + { + return 0; + } + if (i == vecDim) + { + d = samplesCommon::divUp(d, comps); + } + maxNbElems = std::max(maxNbElems, d * strides.d[i]); + } + return maxNbElems * batch * (vecDim < 0 ? 1 : comps); +} + +nvinfer1::Dims toDims(std::vector const& vec) +{ + int32_t limit = static_cast(nvinfer1::Dims::MAX_DIMS); + if (static_cast(vec.size()) > limit) + { + sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl; + } + // Pick first nvinfer1::Dims::MAX_DIMS elements + nvinfer1::Dims dims{std::min(static_cast(vec.size()), limit), {}}; + std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); + return dims; +} + +void loadFromFile(std::string const& fileName, char* dst, size_t size) +{ + ASSERT(dst); + + std::ifstream file(fileName, std::ios::in | std::ios::binary); + if (file.is_open()) + { + file.seekg(0, std::ios::end); + int64_t fileSize = static_cast(file.tellg()); + // Due to change from int32_t to int64_t VC engines created with earlier versions + // may expect input of the half of the size + if (fileSize != static_cast(size) && fileSize != static_cast(size * 2)) + { + std::ostringstream msg; + msg << "Unexpected file size for input file: " << fileName << ". Note: Input binding size is: " << size + << " bytes but the file size is " << fileSize + << " bytes. Double check the size and datatype of the provided data."; + throw std::invalid_argument(msg.str()); + } + // Move file pointer back to the beginning after reading file size. + file.seekg(0, std::ios::beg); + file.read(dst, size); + size_t const nbBytesRead = file.gcount(); + file.close(); + if (nbBytesRead != size) + { + std::ostringstream msg; + msg << "Unexpected file size for input file: " << fileName << ". Note: Expected: " << size + << " bytes but only read: " << nbBytesRead << " bytes"; + throw std::invalid_argument(msg.str()); + } + } + else + { + std::ostringstream msg; + msg << "Cannot open file " << fileName << "!"; + throw std::invalid_argument(msg.str()); + } +} + +std::vector splitToStringVec(std::string const& s, char separator, int64_t maxSplit) +{ + std::vector splitted; + + for (size_t start = 0; start < s.length();) + { + // If maxSplit is specified and we have reached maxSplit, emplace back the rest of the string and break the + // loop. + if (maxSplit >= 0 && static_cast(splitted.size()) == maxSplit) + { + splitted.emplace_back(s.substr(start, s.length() - start)); + break; + } + + size_t separatorIndex = s.find(separator, start); + if (separatorIndex == std::string::npos) + { + separatorIndex = s.length(); + } + splitted.emplace_back(s.substr(start, separatorIndex - start)); + + // If the separator is the last character, then we should push an empty string at the end. + if (separatorIndex == s.length() - 1) + { + splitted.emplace_back(""); + } + + start = separatorIndex + 1; + } + + return splitted; +} + +bool broadcastIOFormats(std::vector const& formats, size_t nbBindings, bool isInput /*= true*/) +{ + bool broadcast = formats.size() == 1; + bool validFormatsCount = broadcast || (formats.size() == nbBindings); + if (!formats.empty() && !validFormatsCount) + { + if (isInput) + { + throw std::invalid_argument( + "The number of inputIOFormats must match network's inputs or be one for broadcasting."); + } + + throw std::invalid_argument( + "The number of outputIOFormats must match network's outputs or be one for broadcasting."); + } + return broadcast; +} + +void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) +{ + using TensorToLayer = std::unordered_map; + using LayerToTensor = std::unordered_map; + + // 1. Collect layers and tensors information from the network. + TensorToLayer matmulI2L; + TensorToLayer constO2L; + TensorToLayer shuffleI2L; + LayerToTensor shuffleL2O; + auto collectMappingInfo = [&](int32_t const idx) + { + ILayer* l = network.getLayer(idx); + switch (l->getType()) + { + case nvinfer1::LayerType::kMATRIX_MULTIPLY: + { + // assume weights on the second input. + matmulI2L.insert({l->getInput(1), l}); + break; + } + case nvinfer1::LayerType::kCONSTANT: + { + DataType const dtype = static_cast(l)->getWeights().type; + if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF) + { + // Sparsify float only. + constO2L.insert({l->getOutput(0), l}); + } + break; + } + case nvinfer1::LayerType::kSHUFFLE: + { + shuffleI2L.insert({l->getInput(0), l}); + shuffleL2O.insert({l, l->getOutput(0)}); + break; + } + default: break; + } + }; + int32_t const nbLayers = network.getNbLayers(); + for (int32_t i = 0; i < nbLayers; ++i) + { + collectMappingInfo(i); + } + if (matmulI2L.size() == 0 || constO2L.size() == 0) + { + // No MatrixMultiply or Constant layer found, no weights to sparsify. + return; + } + + // Helper for analysis + auto isTranspose + = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; + auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; + auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool + { + for (int32_t i = 0; i < dims.nbDims; ++i) + { + if (dims.d[i] != i || dims.d[i] != -1) + { + return false; + } + } + return true; + }; + auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* + { + while (shuffleI2L.find(t) != shuffleI2L.end()) + { + nvinfer1::IShuffleLayer* s = static_cast(shuffleI2L.at(t)); + if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions()) + || !isIdenticalReshape(s->getReshapeDimensions())) + { + break; + } + + if (isTranspose(s->getFirstTranspose())) + { + needTranspose = !needTranspose; + } + if (isTranspose(s->getSecondTranspose())) + { + needTranspose = !needTranspose; + } + + t = shuffleL2O.at(s); + } + return t; + }; + + // 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose + std::unordered_map constantLayerToSparse; + for (auto& o2l : constO2L) + { + // If need to transpose the weights of the Constant layer. + // Need to transpose by default due to semantic difference. + bool needTranspose{true}; + ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); + if (matmulI2L.find(t) == matmulI2L.end()) + { + continue; + } + + // check MatMul params... + IMatrixMultiplyLayer* mm = static_cast(matmulI2L.at(t)); + bool const twoInputs = mm->getNbInputs() == 2; + bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions()); + bool const isSimple = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE + && mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR; + if (!(twoInputs && all2D && isSimple)) + { + continue; + } + if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE) + { + needTranspose = !needTranspose; + } + + constantLayerToSparse.insert({static_cast(o2l.second), needTranspose}); + } + + // 3. Finally, sparsify the weights + auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) + { + Dims dims = layer->getOutput(0)->getDimensions(); + ASSERT(dims.nbDims == 2); + int32_t const idxN = needTranspose ? 1 : 0; + int32_t const n = dims.d[idxN]; + int32_t const k = dims.d[1 - idxN]; + sparseWeights.emplace_back(); + std::vector& spw = sparseWeights.back(); + Weights w = layer->getWeights(); + DataType const dtype = w.type; + ASSERT(dtype == nvinfer1::DataType::kFLOAT + || dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored. + + if (needTranspose) + { + if (dtype == nvinfer1::DataType::kFLOAT) + { + spw.resize(w.count * sizeof(float)); + transpose2DWeights(spw.data(), w.values, k, n); + } + else if (dtype == nvinfer1::DataType::kHALF) + { + spw.resize(w.count * sizeof(half_float::half)); + transpose2DWeights(spw.data(), w.values, k, n); + } + + w.values = spw.data(); + std::vector tmpW; + sparsify(w, n, 1, tmpW); + + if (dtype == nvinfer1::DataType::kFLOAT) + { + transpose2DWeights(spw.data(), tmpW.data(), n, k); + } + else if (dtype == nvinfer1::DataType::kHALF) + { + transpose2DWeights(spw.data(), tmpW.data(), n, k); + } + } + else + { + sparsify(w, n, 1, spw); + } + + w.values = spw.data(); + layer->setWeights(w); + }; + for (auto& l : constantLayerToSparse) + { + sparsifyConstantWeights(l.first, l.second); + } +} + +template +void setSparseWeights(L& l, int32_t k, int32_t trs, std::vector& sparseWeights) +{ + auto weights = l.getKernelWeights(); + sparsify(weights, k, trs, sparseWeights); + weights.values = sparseWeights.data(); + l.setKernelWeights(weights); +} + +// Explicit instantiation +template void setSparseWeights( + IConvolutionLayer& l, int32_t k, int32_t trs, std::vector& sparseWeights); + +void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) +{ + for (int32_t l = 0; l < network.getNbLayers(); ++l) + { + auto* layer = network.getLayer(l); + auto const t = layer->getType(); + if (t == nvinfer1::LayerType::kCONVOLUTION) + { + auto& conv = *static_cast(layer); + auto const& dims = conv.getKernelSizeNd(); + ASSERT(dims.nbDims == 2 || dims.nbDims == 3); + auto const k = conv.getNbOutputMaps(); + auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); + sparseWeights.emplace_back(); + setSparseWeights(conv, k, trs, sparseWeights.back()); + } + } + + sparsifyMatMulKernelWeights(network, sparseWeights); + sample::gLogVerbose << "--sparsity=force pruned " << sparseWeights.size() << " weights to be sparsity pattern." << std::endl; + sample::gLogVerbose << "--sparsity=force has been deprecated. Please use to rewrite the weights to a sparsity pattern and then run with --sparsity=enable" << std::endl; +} + +void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector& sparseWeights) +{ + switch (weights.type) + { + case DataType::kFLOAT: + sparsify(static_cast(weights.values), weights.count, k, trs, sparseWeights); + break; + case DataType::kHALF: + sparsify(static_cast(weights.values), weights.count, k, trs, sparseWeights); + break; + case DataType::kBF16: + sparsify(static_cast(weights.values), weights.count, k, trs, sparseWeights); + break; + case DataType::kINT8: + case DataType::kINT32: + case DataType::kUINT8: + case DataType::kBOOL: + case DataType::kINT4: + case DataType::kFP8: + case DataType::kINT64: + ASSERT(false && "Unsupported data type"); + } +} + +template +void print(std::ostream& os, T v) +{ + os << v; +} + +void print(std::ostream& os, int8_t v) +{ + os << static_cast(v); +} + +void print(std::ostream& os, __half v) +{ + os << static_cast(v); +} + +template +void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv) +{ + auto const vol = volume(dims); + T const* typedBuffer = static_cast(buffer); + std::string sep; + for (int64_t v = 0; v < vol; ++v) + { + int64_t curV = v; + int32_t dataOffset = 0; + for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) + { + int32_t dimVal = curV % dims.d[dimIndex]; + if (dimIndex == vectorDim) + { + dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; + } + else + { + dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); + } + curV /= dims.d[dimIndex]; + ASSERT(curV >= 0); + } + + os << sep; + sep = separator; + print(os, typedBuffer[dataOffset]); + } +} + +// Explicit instantiation +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__half>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); + +template +void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) +{ + auto const c = count / (k * trs); + sparseWeights.resize(count * sizeof(T)); + auto* sparseValues = reinterpret_cast(sparseWeights.data()); + + constexpr int32_t window = 4; + constexpr int32_t nonzeros = 2; + + int32_t const crs = c * trs; + auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; }; + + for (int64_t ki = 0; ki < k; ++ki) + { + for (int64_t rsi = 0; rsi < trs; ++rsi) + { + int32_t w = 0; + int32_t nz = 0; + for (int64_t ci = 0; ci < c; ++ci) + { + auto const index = getIndex(ki, ci, rsi); + if (nz < nonzeros) + { + sparseValues[index] = values[index]; + ++nz; + } + else + { + sparseValues[index] = 0; + } + if (++w == window) + { + w = 0; + nz = 0; + } + } + } + } +} + +// Explicit instantiation +template void sparsify( + float const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights); +template void sparsify( + half_float::half const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights); + +template +void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n) +{ + ASSERT(dst != src); + T* tdst = reinterpret_cast(dst); + T const* tsrc = reinterpret_cast(src); + for (int32_t mi = 0; mi < m; ++mi) + { + for (int32_t ni = 0; ni < n; ++ni) + { + int32_t const isrc = mi * n + ni; + int32_t const idst = ni * m + mi; + tdst[idst] = tsrc[isrc]; + } + } +} + +// Explicit instantiation +template void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n); +template void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n); + +template ::value, bool>::type> +void fillBuffer(void* buffer, int64_t volume, T min, T max) +{ + T* typedBuffer = static_cast(buffer); + std::default_random_engine engine; + std::uniform_int_distribution distribution(min, max); + auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; + std::generate(typedBuffer, typedBuffer + volume, generator); +} + +template ::value, int32_t>::type> +void fillBuffer(void* buffer, int64_t volume, T min, T max) +{ + T* typedBuffer = static_cast(buffer); + std::default_random_engine engine; + std::uniform_real_distribution distribution(min, max); + auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; + std::generate(typedBuffer, typedBuffer + volume, generator); +} + +// Explicit instantiation +template void fillBuffer(void* buffer, int64_t volume, bool min, bool max); +template void fillBuffer(void* buffer, int64_t volume, float min, float max); +template void fillBuffer(void* buffer, int64_t volume, int32_t min, int32_t max); +template void fillBuffer(void* buffer, int64_t volume, int64_t min, int64_t max); +template void fillBuffer(void* buffer, int64_t volume, int8_t min, int8_t max); +template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max); +template void fillBuffer(void* buffer, int64_t volume, BFloat16 min, BFloat16 max); +template void fillBuffer(void* buffer, int64_t volume, uint8_t min, uint8_t max); + +bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) +{ + auto const splitPattern = splitToStringVec(pattern, '*', 1); + + // If there is no wildcard, return if the two strings match exactly. + if (splitPattern.size() == 1) + { + return pattern == target; + } + + // Otherwise, target must follow prefix+anything+postfix pattern. + return target.size() >= (splitPattern[0].size() + splitPattern[1].size()) && target.find(splitPattern[0]) == 0 + && target.rfind(splitPattern[1]) == (target.size() - splitPattern[1].size()); +} + +} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/sampleUtils.h b/src/Detector/tensorrt_yolo/common/sampleUtils.h index 1509a7fc..6cd4280b 100644 --- a/src/Detector/tensorrt_yolo/common/sampleUtils.h +++ b/src/Detector/tensorrt_yolo/common/sampleUtils.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -32,24 +34,20 @@ #include "common.h" #include "logger.h" -#include "sampleDevice.h" -#include "sampleOptions.h" + +#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \ + { \ + if ((condition) == false) \ + { \ + (err) << (msg) << std::endl; \ + return retval; \ + } \ + } namespace sample { -inline int dataTypeSize(nvinfer1::DataType dataType) -{ - switch (dataType) - { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: return 4; - case nvinfer1::DataType::kHALF: return 2; - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kINT8: return 1; - } - return 0; -} +size_t dataTypeSize(nvinfer1::DataType dataType); template inline T roundUp(T m, T n) @@ -57,485 +55,71 @@ inline T roundUp(T m, T n) return ((m + n - 1) / n) * n; } -inline int volume(const nvinfer1::Dims& d) -{ - return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); -} - //! comps is the number of components in a vector. Ignored if vecDim < 0. -inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides, int vecDim, int comps, int batch) -{ - int maxNbElems = 1; - for (int i = 0; i < dims.nbDims; ++i) - { - // Get effective length of axis. - int d = dims.d[i]; - // Any dimension is 0, it is an empty tensor. - if (d == 0) - { - return 0; - } - if (i == vecDim) - { - d = samplesCommon::divUp(d, comps); - } - maxNbElems = std::max(maxNbElems, d * strides.d[i]); - } - return static_cast(maxNbElems) * batch * (vecDim < 0 ? 1 : comps); -} +int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch); -inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) -{ - if (vecDim != -1) - { - dims.d[vecDim] = roundUp(dims.d[vecDim], comps); - } - return volume(dims) * std::max(batch, 1); -} +using samplesCommon::volume; -inline nvinfer1::Dims toDims(const std::vector& vec) -{ - int limit = static_cast(nvinfer1::Dims::MAX_DIMS); - if (static_cast(vec.size()) > limit) - { - sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl; - } - // Pick first nvinfer1::Dims::MAX_DIMS elements - nvinfer1::Dims dims{std::min(static_cast(vec.size()), limit), {}}; - std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); - return dims; -} +nvinfer1::Dims toDims(std::vector const& vec); -template -inline void fillBuffer(void* buffer, int64_t volume, T min, T max) -{ - T* typedBuffer = static_cast(buffer); - std::default_random_engine engine; - if (std::is_integral::value) - { - std::uniform_int_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; - std::generate(typedBuffer, typedBuffer + volume, generator); - } - else - { - std::uniform_real_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; - std::generate(typedBuffer, typedBuffer + volume, generator); - } -} +template ::value, bool>::type = true> +void fillBuffer(void* buffer, int64_t volume, T min, T max); -// Specialization needed for custom type __half -template -inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) -{ - H* typedBuffer = static_cast(buffer); - std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; - std::generate(typedBuffer, typedBuffer + volume, generator); -} -template <> -inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max) -{ - fillBufferHalf(buffer, volume, min, max); -} +template ::value, int32_t>::type = 0> +void fillBuffer(void* buffer, int64_t volume, T min, T max); template -inline void dumpBuffer(const void* buffer, const std::string& separator, std::ostream& os, const nvinfer1::Dims& dims, - const nvinfer1::Dims& strides, int32_t vectorDim, int32_t spv) -{ - const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); - const T* typedBuffer = static_cast(buffer); - std::string sep; - for (int64_t v = 0; v < volume; ++v) - { - int64_t curV = v; - int32_t dataOffset = 0; - for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) - { - int32_t dimVal = curV % dims.d[dimIndex]; - if (dimIndex == vectorDim) - { - dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; - } - else - { - dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); - } - curV /= dims.d[dimIndex]; - ASSERT(curV >= 0); - } - - os << sep << typedBuffer[dataOffset]; - sep = separator; - } -} - -inline void loadFromFile(std::string const& fileName, char* dst, size_t size) -{ - ASSERT(dst); - - std::ifstream file(fileName, std::ios::in | std::ios::binary); - if (file.is_open()) - { - file.read(dst, size); - file.close(); - } - else - { - std::stringstream msg; - msg << "Cannot open file " << fileName << "!"; - throw std::invalid_argument(msg.str()); - } -} - -struct Binding -{ - bool isInput{false}; - std::unique_ptr buffer; - int64_t volume{0}; - nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT}; - - void fill(const std::string& fileName) - { - loadFromFile(fileName, static_cast(buffer->getHostBuffer()), buffer->getSize()); - } - - void fill() - { - switch (dataType) - { - case nvinfer1::DataType::kBOOL: - { - fillBuffer(buffer->getHostBuffer(), volume, 0, 1); - break; - } - case nvinfer1::DataType::kINT32: - { - fillBuffer(buffer->getHostBuffer(), volume, -128, 127); - break; - } - case nvinfer1::DataType::kINT8: - { - fillBuffer(buffer->getHostBuffer(), volume, -128, 127); - break; - } - case nvinfer1::DataType::kFLOAT: - { - fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); - break; - } - case nvinfer1::DataType::kHALF: - { - fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F); - break; - } - } - } - - void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv, - const std::string separator = " ") const - { - switch (dataType) - { - case nvinfer1::DataType::kBOOL: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kINT32: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kINT8: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kFLOAT: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kHALF: - { - dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - } - } -}; - -class Bindings -{ -public: - Bindings() = delete; - explicit Bindings(bool useManaged) - : mUseManaged(useManaged) - { - } - - void addBinding(int b, const std::string& name, bool isInput, int64_t volume, nvinfer1::DataType dataType, - const std::string& fileName = "") - { - while (mBindings.size() <= static_cast(b)) - { - mBindings.emplace_back(); - mDevicePointers.emplace_back(); - } - mNames[name] = b; - if (mBindings[b].buffer == nullptr) - { - if (mUseManaged) - mBindings[b].buffer.reset(new UnifiedMirroredBuffer); - else - mBindings[b].buffer.reset(new DiscreteMirroredBuffer); - } - mBindings[b].isInput = isInput; - // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr - // even for empty tensors, so allocate a dummy byte. - if (volume == 0) - mBindings[b].buffer->allocate(1); - else - mBindings[b].buffer->allocate(static_cast(volume) * static_cast(dataTypeSize(dataType))); - - mBindings[b].volume = volume; - mBindings[b].dataType = dataType; - mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer(); - if (isInput) - { - if (fileName.empty()) - fill(b); - else - fill(b, fileName); - } - } - - void** getDeviceBuffers() - { - return mDevicePointers.data(); - } - - void transferInputToDevice(TrtCudaStream& stream) - { - for (auto& b : mNames) - { - if (mBindings[b.second].isInput) - mBindings[b.second].buffer->hostToDevice(stream); - } - } - - void transferOutputToHost(TrtCudaStream& stream) - { - for (auto& b : mNames) - { - if (!mBindings[b.second].isInput) - mBindings[b.second].buffer->deviceToHost(stream); - } - } - - void fill(int binding, const std::string& fileName) - { - mBindings[binding].fill(fileName); - } - - void fill(int binding) - { - mBindings[binding].fill(); - } +void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, nvinfer1::Dims const& dims, + nvinfer1::Dims const& strides, int32_t vectorDim, int32_t spv); - void dumpBindingDimensions(int binding, const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - const auto dims = context.getBindingDimensions(binding); - // Do not add a newline terminator, because the caller may be outputting a JSON string. - os << dims; - } - - void dumpBindingValues(const nvinfer1::IExecutionContext& context, int binding, std::ostream& os, - const std::string& separator = " ", int32_t batch = 1) const - { - nvinfer1::Dims dims = context.getBindingDimensions(binding); - nvinfer1::Dims strides = context.getStrides(binding); - int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding); - const int32_t spv = context.getEngine().getBindingComponentsPerElement(binding); +void loadFromFile(std::string const& fileName, char* dst, size_t size); - if (context.getEngine().hasImplicitBatchDimension()) - { - auto insertN = [](nvinfer1::Dims& d, int32_t bs) { - const int32_t nbDims = d.nbDims; - ASSERT(nbDims < nvinfer1::Dims::MAX_DIMS); - std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]); - d.d[0] = bs; - d.nbDims = nbDims + 1; - }; - int32_t batchStride = 0; - for (int32_t i = 0; i < strides.nbDims; ++i) - { - if (strides.d[i] * dims.d[i] > batchStride) - { - batchStride = strides.d[i] * dims.d[i]; - } - } - insertN(dims, batch); - insertN(strides, batchStride); - vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1; - } - - mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator); - } - - void dumpInputs(const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - auto isInput = [](const Binding& b) { return b.isInput; }; - dumpBindings(context, isInput, os); - } +std::vector splitToStringVec(std::string const& option, char separator, int64_t maxSplit = -1); - void dumpOutputs(const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - auto isOutput = [](const Binding& b) { return !b.isInput; }; - dumpBindings(context, isOutput, os); - } +bool broadcastIOFormats(std::vector const& formats, size_t nbBindings, bool isInput = true); - void dumpBindings(const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - auto all = [](const Binding& /*b*/) { return true; }; - dumpBindings(context, all, os); - } +int32_t getCudaDriverVersion(); - void dumpBindings( - const nvinfer1::IExecutionContext& context, bool (*predicate)(const Binding& b), std::ostream& os) const - { - for (const auto& n : mNames) - { - const auto binding = n.second; - if (predicate(mBindings[binding])) - { - os << n.first << ": ("; - dumpBindingDimensions(binding, context, os); - os << ")" << std::endl; +int32_t getCudaRuntimeVersion(); - dumpBindingValues(context, binding, os); - os << std::endl; - } - } - } +void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights); +void sparsify(nvinfer1::Weights const& weights, int32_t k, int32_t rs, std::vector& sparseWeights); - std::unordered_map getInputBindings() const - { - auto isInput = [](const Binding& b) { return b.isInput; }; - return getBindings(isInput); - } - - std::unordered_map getOutputBindings() const - { - auto isOutput = [](const Binding& b) { return !b.isInput; }; - return getBindings(isOutput); - } - - std::unordered_map getBindings() const - { - auto all = [](const Binding& /*b*/) { return true; }; - return getBindings(all); - } +// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0. +template +void sparsify(T const* values, int64_t count, int32_t k, int32_t rs, std::vector& sparseWeights); - std::unordered_map getBindings(bool (*predicate)(const Binding& b)) const - { - std::unordered_map bindings; - for (const auto& n : mNames) - { - const auto binding = n.second; - if (predicate(mBindings[binding])) - bindings.insert(n); - } - return bindings; - } +template +void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector& sparseWeights); -private: - std::unordered_map mNames; - std::vector mBindings; - std::vector mDevicePointers; - bool mUseManaged{false}; -}; +// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers. +// Forward analysis on the API graph to determine which weights to sparsify. +void sparsifyMatMulKernelWeights( + nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights); template -struct TrtDestroyer -{ - void operator()(T* t) - { - //t->destroy(); - delete t; - } -}; +void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n); -template -using TrtUniquePtr = std::unique_ptr>; +//! A helper function to match a target string with a pattern where the pattern can contain up to one wildcard ('*') +//! character that matches to any strings. +bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target); -inline bool broadcastIOFormats(const std::vector& formats, size_t nbBindings, bool isInput = true) -{ - bool broadcast = formats.size() == 1; - bool validFormatsCount = broadcast || (formats.size() == nbBindings); - if (!formats.empty() && !validFormatsCount) - { - if (isInput) - { - throw std::invalid_argument( - "The number of inputIOFormats must match network's inputs or be one for broadcasting."); - } - else - { - throw std::invalid_argument( - "The number of outputIOFormats must match network's outputs or be one for broadcasting."); - } - } - return broadcast; -} - -inline std::vector loadTimingCacheFile(const std::string inFileName) -{ - std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); - if (!iFile) - { - sample::gLogWarning << "Could not read timing cache from: " << inFileName - << ". A new timing cache will be generated and written." << std::endl; - return std::vector(); - } - iFile.seekg(0, std::ifstream::end); - size_t fsize = iFile.tellg(); - iFile.seekg(0, std::ifstream::beg); - std::vector content(fsize); - iFile.read(content.data(), fsize); - iFile.close(); - sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from " << inFileName << std::endl; - return content; -} - -inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob) +//! A helper method to find an item from an unordered_map. If the exact match exists, this is identical to +//! map.find(target). If the exact match does not exist, it returns the first plausible match, taking up to one wildcard +//! into account. If there is no plausible match, then it returns map.end(). +template +typename std::unordered_map::const_iterator findPlausible( + std::unordered_map const& map, std::string const& target) { - std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); - if (!oFile) + auto res = map.find(target); + if (res == map.end()) { - sample::gLogWarning << "Could not write timing cache to: " << outFileName << std::endl; - return; + res = std::find_if( + map.begin(), map.end(), [&](typename std::unordered_map::value_type const& item) { + return matchStringWithOneWildcard(item.first, target); + }); } - oFile.write((char*) blob->data(), blob->size()); - oFile.close(); - sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to " << outFileName << std::endl; -} - -inline int32_t getCudaDriverVersion() -{ - int32_t version{-1}; - cudaCheck(cudaDriverGetVersion(&version)); - return version; -} - -inline int32_t getCudaRuntimeVersion() -{ - int32_t version{-1}; - cudaCheck(cudaRuntimeGetVersion(&version)); - return version; + return res; } } // namespace sample diff --git a/src/Detector/tensorrt_yolo/common/streamReader.h b/src/Detector/tensorrt_yolo/common/streamReader.h new file mode 100644 index 00000000..7d4aa1c6 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/streamReader.h @@ -0,0 +1,78 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STREAM_READER_H +#define STREAM_READER_H + +#include "NvInferRuntime.h" +#include "sampleUtils.h" +#include + +namespace samplesCommon +{ + +//! Implements the TensorRT IStreamReader to allow deserializing an engine directly from the plan file. +class FileStreamReader final : public nvinfer1::IStreamReader +{ +public: + bool open(std::string filepath) + { + mFile.open(filepath, std::ios::binary); + return mFile.is_open(); + } + + void close() + { + if (mFile.is_open()) + { + mFile.close(); + } + } + + ~FileStreamReader() final + { + close(); + } + + int64_t read(void* dest, int64_t bytes) final + { + if (!mFile.good()) + { + return -1; + } + mFile.read(static_cast(dest), bytes); + return mFile.gcount(); + } + + void reset() + { + assert(mFile.good()); + mFile.seekg(0); + } + + bool isOpen() const + { + return mFile.is_open(); + } + +private: + std::ifstream mFile; +}; + +} // namespace samplesCommon + +#endif // STREAM_READER_H diff --git a/src/Detector/tensorrt_yolo/common/timingCache.cpp b/src/Detector/tensorrt_yolo/common/timingCache.cpp new file mode 100644 index 00000000..18e85ba4 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/timingCache.cpp @@ -0,0 +1,157 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "timingCache.h" +#include "NvInfer.h" +#include "fileLock.h" +#include "sampleUtils.h" +#include +#include +#include +#include +#include +#include +using namespace nvinfer1; +namespace nvinfer1 +{ +namespace utils +{ +std::vector loadTimingCacheFile(ILogger& logger, std::string const& inFileName) +{ + try + { + std::unique_ptr fileLock{new FileLock(logger, inFileName)}; + std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); + if (!iFile) + { + std::stringstream ss; + ss << "Could not read timing cache from: " << inFileName + << ". A new timing cache will be generated and written."; + logger.log(ILogger::Severity::kWARNING, ss.str().c_str()); + return std::vector(); + } + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + std::stringstream ss; + ss << "Loaded " << fsize << " bytes of timing cache from " << inFileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + return content; + } + catch (std::exception const& e) + { + std::cerr << "Exception detected: " << e.what() << std::endl; + } + return {}; +} + +std::unique_ptr buildTimingCacheFromFile( + ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile, std::ostream& err) +{ + std::unique_ptr timingCache{}; + auto timingCacheContents = loadTimingCacheFile(logger, timingCacheFile); + timingCache.reset(config.createTimingCache(timingCacheContents.data(), timingCacheContents.size())); + SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", nullptr, err); + config.clearFlag(BuilderFlag::kDISABLE_TIMING_CACHE); + SMP_RETVAL_IF_FALSE( + config.setTimingCache(*timingCache, true), "IBuilderConfig setTimingCache failed", nullptr, err); + return timingCache; +} + +void saveTimingCacheFile(ILogger& logger, std::string const& outFileName, IHostMemory const* blob) +{ + try + { + std::unique_ptr fileLock{new FileLock(logger, outFileName)}; + std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); + if (!oFile) + { + std::stringstream ss; + ss << "Could not write timing cache to: " << outFileName; + logger.log(ILogger::Severity::kWARNING, ss.str().c_str()); + return; + } + oFile.write(reinterpret_cast(blob->data()), blob->size()); + oFile.close(); + std::stringstream ss; + ss << "Saved " << blob->size() << " bytes of timing cache to " << outFileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + } + catch (std::exception const& e) + { + std::cerr << "Exception detected: " << e.what() << std::endl; + } +} + +void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName, + nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder) +{ + try + { + // Prepare empty timingCache in case that there is no existing file to read + std::unique_ptr config{builder.createBuilderConfig()}; + std::unique_ptr fileTimingCache{config->createTimingCache(static_cast(nullptr), 0)}; + + std::unique_ptr fileLock{new FileLock(logger, fileName)}; + std::ifstream iFile(fileName, std::ios::in | std::ios::binary); + if (iFile) + { + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + std::stringstream ss; + ss << "Loaded " << fsize << " bytes of timing cache from " << fileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + fileTimingCache.reset(config->createTimingCache(static_cast(content.data()), content.size())); + if (!fileTimingCache) + { + throw std::runtime_error("Failed to create timingCache from " + fileName + "!"); + } + } + fileTimingCache->combine(*timingCache, false); + std::unique_ptr blob{fileTimingCache->serialize()}; + if (!blob) + { + throw std::runtime_error("Failed to serialize ITimingCache!"); + } + std::ofstream oFile(fileName, std::ios::out | std::ios::binary); + if (!oFile) + { + std::stringstream ss; + ss << "Could not write timing cache to: " << fileName; + logger.log(ILogger::Severity::kWARNING, ss.str().c_str()); + return; + } + oFile.write(reinterpret_cast(blob->data()), blob->size()); + oFile.close(); + std::stringstream ss; + ss << "Saved " << blob->size() << " bytes of timing cache to " << fileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + } + catch (std::exception const& e) + { + std::cerr << "Exception detected: " << e.what() << std::endl; + } +} +} // namespace utils +} // namespace nvinfer1 diff --git a/src/Detector/tensorrt_yolo/common/timingCache.h b/src/Detector/tensorrt_yolo/common/timingCache.h new file mode 100644 index 00000000..c4c76e37 --- /dev/null +++ b/src/Detector/tensorrt_yolo/common/timingCache.h @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_SAMPLES_COMMON_TIMINGCACHE_H_ +#define TENSORRT_SAMPLES_COMMON_TIMINGCACHE_H_ +#include "NvInfer.h" +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace utils +{ +std::vector loadTimingCacheFile(nvinfer1::ILogger& logger, std::string const& inFileName); +std::unique_ptr buildTimingCacheFromFile( + ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile, std::ostream& err); +void saveTimingCacheFile(nvinfer1::ILogger& logger, std::string const& outFileName, nvinfer1::IHostMemory const* blob); +void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName, + nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder); +} // namespace utils +} // namespace nvinfer1 + +#endif // TENSORRT_SAMPLES_COMMON_TIMINGCACHE_H_ diff --git a/src/Detector/tensorrt_yolo/ds_image.cpp b/src/Detector/tensorrt_yolo/ds_image.cpp index b801b874..77404f97 100644 --- a/src/Detector/tensorrt_yolo/ds_image.cpp +++ b/src/Detector/tensorrt_yolo/ds_image.cpp @@ -50,7 +50,8 @@ DsImage::DsImage(const cv::Mat& mat_image_, tensor_rt::ModelType net_type, const if (tensor_rt::ModelType::YOLOV5 == net_type || tensor_rt::ModelType::YOLOV6 == net_type || tensor_rt::ModelType::YOLOV7 == net_type || tensor_rt::ModelType::YOLOV7Mask == net_type || tensor_rt::ModelType::YOLOV8 == net_type || tensor_rt::ModelType::YOLOV8_OBB == net_type || tensor_rt::ModelType::YOLOV8Mask == net_type || - tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type) + tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type || + tensor_rt::ModelType::YOLOV11 == net_type || tensor_rt::ModelType::YOLOV11_OBB == net_type || tensor_rt::ModelType::YOLOV11Mask == net_type) { // resize the DsImage with scale float r = std::min(static_cast(inputH) / static_cast(m_Height), static_cast(inputW) / static_cast(m_Width)); @@ -101,7 +102,8 @@ DsImage::DsImage(const std::string& path, tensor_rt::ModelType net_type, const i if (tensor_rt::ModelType::YOLOV5 == net_type || tensor_rt::ModelType::YOLOV6 == net_type || tensor_rt::ModelType::YOLOV7 == net_type || tensor_rt::ModelType::YOLOV7Mask == net_type || tensor_rt::ModelType::YOLOV8 == net_type || tensor_rt::ModelType::YOLOV8_OBB == net_type || tensor_rt::ModelType::YOLOV8Mask == net_type || - tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type) + tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type || + tensor_rt::ModelType::YOLOV11 == net_type || tensor_rt::ModelType::YOLOV11_OBB == net_type || tensor_rt::ModelType::YOLOV11Mask == net_type) { // resize the DsImage with scale float dim = std::max(m_Height, m_Width); diff --git a/src/Detector/tensorrt_yolo/yolo.cpp b/src/Detector/tensorrt_yolo/yolo.cpp index a60d3dc4..4ee202b6 100644 --- a/src/Detector/tensorrt_yolo/yolo.cpp +++ b/src/Detector/tensorrt_yolo/yolo.cpp @@ -78,7 +78,31 @@ Yolo::Yolo(const NetworkInfo& networkInfo, const InferParams& inferParams) assert(m_Engine != nullptr); m_Context = m_Engine->createExecutionContext(); assert(m_Context != nullptr); + + auto numBindings = m_Engine->getNbIOTensors(); + //std::cout << "** Bindings: " << numBindings << " **" << std::endl; + for (int32_t i = 0; i < numBindings; ++i) + { + std::string bindName = m_Engine->getIOTensorName(i); + m_tensorNames.emplace(bindName, i); + nvinfer1::Dims dim = m_Engine->getTensorShape(bindName.c_str()); + + std::cout << i << ": name: " << bindName; + std::cout << ", size: "; + for (int j = 0; j < dim.nbDims; ++j) + { + std::cout << dim.d[j]; + if (j < dim.nbDims - 1) + std::cout << "x"; + } + std::cout << std::endl; + + if (m_InputBlobName == bindName) + m_InputBindingIndex = i; + } +#if (NV_TENSORRT_MAJOR < 9) m_InputBindingIndex = m_Engine->getBindingIndex(m_InputBlobName.c_str()); +#endif assert(m_InputBindingIndex != -1); assert(m_BatchSize <= static_cast(m_Engine->getMaxBatchSize())); allocateBuffers(); @@ -464,7 +488,14 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr // Build the engine std::cout << "Building the TensorRT Engine..." << std::endl; - m_Engine = m_Builder->buildEngineWithConfig(*m_Network, *config); +#if (NV_TENSORRT_MAJOR < 9) + m_Engine = m_Builder->buildEngineWithConfig(*m_Network, *config); +#else + nvinfer1::IRuntime* inferRuntime = nvinfer1::createInferRuntime(m_Logger); + nvinfer1::IHostMemory* serialNetmork = m_Builder->buildSerializedNetwork(*m_Network, *config); + m_Engine = inferRuntime->deserializeCudaEngine(serialNetmork->data(), serialNetmork->size()); + delete inferRuntime; +#endif assert(m_Engine != nullptr); std::cout << "Building complete!" << std::endl; @@ -942,7 +973,15 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, Int8EntropyCa #endif // Build the engine std::cout << "Building the TensorRT Engine..." << std::endl; +#if (NV_TENSORRT_MAJOR < 9) m_Engine = m_Builder->buildEngineWithConfig(*m_Network, *config); +#else + nvinfer1::IRuntime* inferRuntime = nvinfer1::createInferRuntime(m_Logger); + nvinfer1::IHostMemory* serialNetmork = m_Builder->buildSerializedNetwork(*m_Network, *config); + m_Engine = inferRuntime->deserializeCudaEngine(serialNetmork->data(), serialNetmork->size()); + delete inferRuntime; +#endif + assert(m_Engine != nullptr); std::cout << "Building complete!" << std::endl; @@ -987,7 +1026,8 @@ void Yolo::doInference(const unsigned char* input, const uint32_t batchSize) batchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice, m_CudaStream)); - m_Context->enqueue(batchSize, m_DeviceBuffers.data(), m_CudaStream, nullptr); + //m_Context->enqueueV3(batchSize, m_DeviceBuffers.data(), m_CudaStream, nullptr); + m_Context->enqueueV3(m_CudaStream); for (auto& tensor : m_OutputTensors) { NV_CUDA_CHECK(cudaMemcpyAsync(tensor.hostBuffer, m_DeviceBuffers.at(tensor.bindingIndex), @@ -1249,8 +1289,7 @@ void Yolo::parse_cfg_blocks_v5(const std::vectorgetNbBindings(), nullptr); + m_DeviceBuffers.resize(m_Engine->getNbIOTensors(), nullptr); assert(m_InputBindingIndex != -1 && "Invalid input binding index"); - NV_CUDA_CHECK(cudaMalloc(&m_DeviceBuffers.at(m_InputBindingIndex), - m_BatchSize * m_InputSize * sizeof(float))); + NV_CUDA_CHECK(cudaMalloc(&m_DeviceBuffers.at(m_InputBindingIndex), m_BatchSize * m_InputSize * sizeof(float))); for (auto& tensor : m_OutputTensors) { +#if (NV_TENSORRT_MAJOR < 9) tensor.bindingIndex = m_Engine->getBindingIndex(tensor.blobName.c_str()); +#else + auto it = m_tensorNames.find(tensor.blobName); + tensor.bindingIndex = (it != std::end(m_tensorNames)) ? it->second : -1; +#endif assert((tensor.bindingIndex != -1) && "Invalid output binding index"); - NV_CUDA_CHECK(cudaMalloc(&m_DeviceBuffers.at(tensor.bindingIndex), - m_BatchSize * tensor.volume * sizeof(float))); - NV_CUDA_CHECK( - cudaMallocHost(&tensor.hostBuffer, tensor.volume * m_BatchSize * sizeof(float))); + NV_CUDA_CHECK(cudaMalloc(&m_DeviceBuffers.at(tensor.bindingIndex), m_BatchSize * tensor.volume * sizeof(float))); + NV_CUDA_CHECK(cudaMallocHost((void**)&tensor.hostBuffer, tensor.volume * m_BatchSize * sizeof(float))); } } diff --git a/src/Detector/tensorrt_yolo/yolo.h b/src/Detector/tensorrt_yolo/yolo.h index be347d19..4cfdba16 100644 --- a/src/Detector/tensorrt_yolo/yolo.h +++ b/src/Detector/tensorrt_yolo/yolo.h @@ -158,6 +158,7 @@ class Yolo std::vector m_DeviceBuffers; int m_InputBindingIndex = -1; cudaStream_t m_CudaStream = nullptr; + std::map m_tensorNames; virtual std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor) = 0;