From b3bcfb36de6307f97027082e402bc53a37961055 Mon Sep 17 00:00:00 2001 From: Nuzhny007 Date: Thu, 3 Oct 2024 00:05:29 +0300 Subject: [PATCH] TensorRT 10 is supported, YOLOv11, YOLOv11-obb and YOLOv11-seg detector worked with TensorRT --- README.md | 4 + data/settings_yolov11.ini | 142 + data/settings_yolov11_obb.ini | 142 + data/settings_yolov11_seg.ini | 142 + example/examples.h | 5 +- src/Detector/OCVDNNDetector.cpp | 7 +- src/Detector/OCVDNNDetector.h | 5 +- src/Detector/YoloTensorRTDetector.cpp | 5 +- src/Detector/tensorrt_yolo/CMakeLists.txt | 13 +- src/Detector/tensorrt_yolo/YoloONNX.cpp | 6 +- src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp | 111 + .../tensorrt_yolo/YoloONNXv11_instance.hpp | 301 ++ .../tensorrt_yolo/YoloONNXv11_obb.hpp | 124 + src/Detector/tensorrt_yolo/class_detector.cpp | 23 +- src/Detector/tensorrt_yolo/class_detector.h | 5 +- .../tensorrt_yolo/cmake/FindTensorRT.cmake | 143 +- .../{sampleEngines.cpp_ => sampleEngines.cpp} | 6 +- ...mpleInference.cpp_ => sampleInference.cpp} | 0 .../common_deprecated/BatchStream.h | 388 -- .../common_deprecated/EntropyCalibrator.h | 134 - .../common_deprecated/ErrorRecorder.h | 137 - .../tensorrt_yolo/common_deprecated/buffers.h | 478 -- .../tensorrt_yolo/common_deprecated/common.h | 963 ---- .../tensorrt_yolo/common_deprecated/half.h | 4302 ----------------- .../common_deprecated/logger.cpp | 40 - .../tensorrt_yolo/common_deprecated/logger.h | 36 - .../tensorrt_yolo/common_deprecated/logging.h | 578 --- .../common_deprecated/parserOnnxConfig.h | 153 - .../common_deprecated/safeCommon.h | 71 - .../common_deprecated/sampleConfig.h | 337 -- .../common_deprecated/sampleDevice.h | 494 -- .../common_deprecated/sampleEngines.cpp | 1629 ------- .../common_deprecated/sampleEngines.h | 183 - .../common_deprecated/sampleInference.cpp | 990 ---- .../common_deprecated/sampleInference.h | 92 - .../common_deprecated/sampleOptions.cpp | 1778 ------- .../common_deprecated/sampleOptions.h | 355 -- .../common_deprecated/sampleReporting.cpp | 445 -- .../common_deprecated/sampleReporting.h | 222 - .../common_deprecated/sampleUtils.h | 543 --- src/Detector/tensorrt_yolo/ds_image.cpp | 6 +- 41 files changed, 1121 insertions(+), 14417 deletions(-) create mode 100644 data/settings_yolov11.ini create mode 100644 data/settings_yolov11_obb.ini create mode 100644 data/settings_yolov11_seg.ini create mode 100644 src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp create mode 100644 src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp create mode 100644 src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp rename src/Detector/tensorrt_yolo/common/{sampleEngines.cpp_ => sampleEngines.cpp} (99%) rename src/Detector/tensorrt_yolo/common/{sampleInference.cpp_ => sampleInference.cpp} (100%) delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/BatchStream.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/EntropyCalibrator.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/ErrorRecorder.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/buffers.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/common.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/half.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/logger.cpp delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/logger.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/logging.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/parserOnnxConfig.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/safeCommon.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleConfig.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleDevice.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.cpp delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleInference.cpp delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleInference.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.cpp delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.cpp delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.h delete mode 100644 src/Detector/tensorrt_yolo/common_deprecated/sampleUtils.h diff --git a/README.md b/README.md index 27b5fba26..a66543cab 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ # Last changes +* TensorRT 10 is supported + +* YOLOv11, YOLOv11-obb and YOLOv11-seg detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example + * YOLOv8-obb detector worked with TensorRT! Export pretrained Pytorch models [here (ultralytics/ultralytics)](https://github.com/ultralytics/ultralytics) to onnx format and run Multitarget-tracker with -e=6 example * YOLOv10 detector worked with TensorRT! Export pretrained Pytorch models [here (THU-MIG/yolov10)](https://github.com/THU-MIG/yolov10) to onnx format and run Multitarget-tracker with -e=6 example diff --git a/data/settings_yolov11.ini b/data/settings_yolov11.ini new file mode 100644 index 000000000..c82412cd7 --- /dev/null +++ b/data/settings_yolov11.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 12 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this frames then it will be removed +max_skip_frames = 50 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 50 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11_obb.ini b/data/settings_yolov11_obb.ini new file mode 100644 index 000000000..599e5dd59 --- /dev/null +++ b/data/settings_yolov11_obb.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 12 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/DOTA.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this frames then it will be removed +max_skip_frames = 50 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 50 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11_seg.ini b/data/settings_yolov11_seg.ini new file mode 100644 index 000000000..cb5c83eae --- /dev/null +++ b/data/settings_yolov11_seg.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 12 +# darknet_cudnn = 10 +# tensorrt = 11 +detector_backend = 12 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this frames then it will be removed +max_skip_frames = 50 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 50 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/example/examples.h b/example/examples.h index 1be76399d..08b0fc674 100644 --- a/example/examples.h +++ b/example/examples.h @@ -652,7 +652,10 @@ class YoloTensorRTExample final : public VideoExample YOLOV8_OBB, YOLOv8Mask, YOLOv9, - YOLOv10 + YOLOv10, + YOLOv11, + YOLOv11_OBB, + YOLOv11Mask }; YOLOModels usedModel = YOLOModels::YOLOv9; switch (usedModel) diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 01d1102f3..3da659679 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -142,6 +142,9 @@ bool OCVDNNDetector::Init(const config_t& config) dictNetType["YOLOV8Mask"] = ModelType::YOLOV8Mask; dictNetType["YOLOV9"] = ModelType::YOLOV9; dictNetType["YOLOV10"] = ModelType::YOLOV10; + dictNetType["YOLOV11"] = ModelType::YOLOV11; + dictNetType["YOLOV11_OBB"] = ModelType::YOLOV11_OBB; + dictNetType["YOLOV11Mask"] = ModelType::YOLOV11Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -348,7 +351,7 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr } else { - if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV5 || m_netType == ModelType::YOLOV9 || m_netType == ModelType::YOLOV10) + if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV5 || m_netType == ModelType::YOLOV9 || m_netType == ModelType::YOLOV10 || m_netType == ModelType::YOLOV11) { int rows = detections[0].size[1]; int dimensions = detections[0].size[2]; @@ -370,7 +373,7 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr for (int i = 0; i < rows; ++i) { - if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV9) + if (m_netType == ModelType::YOLOV8 || m_netType == ModelType::YOLOV9 || m_netType == ModelType::YOLOV11) { float* classes_scores = data + 4; diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 79842ba29..44d91b4de 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -42,7 +42,10 @@ class OCVDNNDetector final : public BaseDetector YOLOV8_OBB, YOLOV8Mask, YOLOV9, - YOLOV10 + YOLOV10, + YOLOV11, + YOLOV11_OBB, + YOLOV11Mask }; cv::dnn::Net m_net; diff --git a/src/Detector/YoloTensorRTDetector.cpp b/src/Detector/YoloTensorRTDetector.cpp index a0ebeb443..d1cfb352f 100644 --- a/src/Detector/YoloTensorRTDetector.cpp +++ b/src/Detector/YoloTensorRTDetector.cpp @@ -107,6 +107,9 @@ bool YoloTensorRTDetector::Init(const config_t& config) dictNetType["YOLOV8Mask"] = tensor_rt::YOLOV8Mask; dictNetType["YOLOV9"] = tensor_rt::YOLOV9; dictNetType["YOLOV10"] = tensor_rt::YOLOV10; + dictNetType["YOLOV11"] = tensor_rt::YOLOV11; + dictNetType["YOLOV11_OBB"] = tensor_rt::YOLOV11_OBB; + dictNetType["YOLOV11Mask"] = tensor_rt::YOLOV11Mask; auto netType = dictNetType.find(net_type->second); if (netType != dictNetType.end()) @@ -298,7 +301,7 @@ void YoloTensorRTDetector::Detect(const std::vector& frames, std::vect /// void YoloTensorRTDetector::CalcMotionMap(cv::Mat& frame) { - if (m_localConfig.net_type == tensor_rt::YOLOV7Mask || m_localConfig.net_type == tensor_rt::YOLOV8Mask) + if (m_localConfig.net_type == tensor_rt::YOLOV7Mask || m_localConfig.net_type == tensor_rt::YOLOV8Mask || m_localConfig.net_type == tensor_rt::YOLOV11Mask) { static std::vector color; if (color.empty()) diff --git a/src/Detector/tensorrt_yolo/CMakeLists.txt b/src/Detector/tensorrt_yolo/CMakeLists.txt index 30f916bfd..d09a22431 100644 --- a/src/Detector/tensorrt_yolo/CMakeLists.txt +++ b/src/Detector/tensorrt_yolo/CMakeLists.txt @@ -43,7 +43,7 @@ SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) find_package(CUDNN REQUIRED) find_package(TensorRT REQUIRED) -message("TensorRT major version: " ${TensorRT_VERSION_MAJOR}) +message("TensorRT version: " ${TensorRT_VERSION}) include_directories(${OpenCV_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS}) @@ -64,11 +64,8 @@ cuda_add_library(${libname_rt} SHARED #message("${OpenCV_LIBS}") #message(${OpenCV_DIR}) -if (MSVC) - file(GLOB TensorRT_LIBRARIES ${TensorRT_LIBRARY}) -endif() +set(TensorRT_LIBRARIES ${TensorRT_LIBRARIES} ${TensorRT_nvinfer_LIBRARY} ${TensorRT_nvinfer_plugin_LIBRARY} ${TensorRT_nvonnxparser_LIBRARY}) -message("TensorRT_LIBRARY: ${TensorRT_LIBRARY}") message("TensorRT_LIBRARIES: ${TensorRT_LIBRARIES}") @@ -84,9 +81,11 @@ set(TENSORRT_LIBS ${TensorRT_LIBRARIES}) if (CMAKE_COMPILER_IS_GNUCXX) - set(TENSORRT_LIBS ${TENSORRT_LIBS} stdc++fs nvinfer_plugin nvonnxparser) + set(TENSORRT_LIBS ${TENSORRT_LIBS} stdc++fs) endif(CMAKE_COMPILER_IS_GNUCXX) +message("TENSORRT_LIBS: ${TENSORRT_LIBS}") + target_link_libraries(${libname_rt} ${TENSORRT_LIBS}) install(TARGETS ${libname_rt} @@ -96,4 +95,4 @@ install(TARGETS ${libname_rt} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) -set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") \ No newline at end of file +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Detector/tensorrt_yolo/YoloONNX.cpp b/src/Detector/tensorrt_yolo/YoloONNX.cpp index 0b19d5ccc..3ea99ec46 100644 --- a/src/Detector/tensorrt_yolo/YoloONNX.cpp +++ b/src/Detector/tensorrt_yolo/YoloONNX.cpp @@ -1,5 +1,7 @@ #include +#define DEFINE_TRT_ENTRYPOINTS 1 + #include "YoloONNX.hpp" #include "trt_utils.h" #include "../../common/defines.h" @@ -164,9 +166,9 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, size_t dlaManagedSRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM); size_t dlaLocalDRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM); size_t dlaGlobalDRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM); - std::cout << "workspaceSize = " << workspaceSize << ", dlaManagedSRAMSize = " << dlaManagedSRAMSize << ", dlaLocalDRAMSize = " << dlaLocalDRAMSize << ", dlaGlobalDRAMSize = " << dlaGlobalDRAMSize << std::endl; + std::cout << "m_params.videoMemory = " << m_params.videoMemory << ", workspaceSize = " << workspaceSize << ", dlaManagedSRAMSize = " << dlaManagedSRAMSize << ", dlaLocalDRAMSize = " << dlaLocalDRAMSize << ", dlaGlobalDRAMSize = " << dlaGlobalDRAMSize << std::endl; - config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, m_params.videoMemory ? m_params.videoMemory : (1 << 20)); + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, m_params.videoMemory ? m_params.videoMemory : workspaceSize); #endif config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp new file mode 100644 index 000000000..9103bfa67 --- /dev/null +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp @@ -0,0 +1,111 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_bb_onnx class +/// +class YOLOv11_bb_onnx : public YoloONNX +{ +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + + const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); + const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = m_outpuDims[0].d[ncInd] - 4; + int dimensions = nc + 4; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2); + float y = fh * (output[k + 1] - output[k + 3] / 2); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp new file mode 100644 index 000000000..ea6ea2a29 --- /dev/null +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp @@ -0,0 +1,301 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_instance_onnx class +/// +class YOLOv11_instance_onnx : public YoloONNX +{ +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); + const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); + + size_t outInd = (outputs.size() == 0) ? 0 : 1; + size_t segInd = (outputs.size() == 0) ? 1 : 0; + + auto output = outputs[0]; + + //std::cout << "output[1] mem:\n"; + //auto output1 = outputs[1]; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output1[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + //0: name: images, size: 1x3x640x640 + //1: name: output1, size: 1x32x160x160 + //2: name: output0, size: 1x116x8400 + // 25200 = 3x80x80 + 3x40x40 + 3x20x20 + // 116 = x, y, w, h, 80 classes, 32 seg ancors + // 80 * 8 = 640, 40 * 16 = 640, 20 * 32 = 640 + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = m_outpuDims[outInd].d[ncInd] - 4 - 32; + int dimensions = nc + 32 + 4; + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]) / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + +#if 1 + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + //std::cout << "output1 nbDims: " << m_outpuDims[segInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[segInd].d[i]; + // if (i + 1 != m_outpuDims[segInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[outInd].d[i]; + // if (i + 1 != m_outpuDims[outInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + + segChannels = m_outpuDims[segInd].d[1]; + segWidth = m_outpuDims[segInd].d[2]; + segHeight = m_outpuDims[segInd].d[3]; + } + cv::Mat maskProposals; + std::vector> picked_proposals; + int net_width = nc + 4 + segChannels; +#endif + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4 + 32); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // std::cout << "without nms: mem" << i << ": "; + // for (size_t ii = 0; ii < 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = 4; ii < nc + 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + //} + + if (objectConf >= m_params.confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2); + float y = fh * (output[k + 1] - output[k + 3] / 2); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + + //auto ClampToFrame = [](float& v, float& size, int hi) -> int + //{ + // int res = 0; +// + // if (size < 1) + // size = 0; +// + // if (v < 0) + // { + // res = v; + // v = 0; + // return res; + // } + // else if (v + size > hi - 1) + // { + // res = v; + // v = hi - 1 - size; + // if (v < 0) + // { + // size += v; + // v = 0; + // } + // res -= v; + // return res; + // } + // return res; + //}; + //ClampToFrame(x, width, frameSize.width); + //ClampToFrame(y, height, frameSize.height); + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (width > 4 && height > 4) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + std::vector temp_proto(output + k + 4 + nc, output + k + net_width); + picked_proposals.push_back(temp_proto); + } + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); + maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); + } + + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[1]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = m_inputDims.d[3]; + int INPUT_H = m_inputDims.d[2]; + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, frameSize, cv::INTER_NEAREST); + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + + std::vector> contours; + std::vector hierarchy; +#if (CV_VERSION_MAJOR < 4) + cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + + br.x += resBoxes[i].m_brect.x; + br.y += resBoxes[i].m_brect.y; + rr.center.x += resBoxes[i].m_brect.x; + rr.center.y += resBoxes[i].m_brect.y; + + //std::cout << "rr: " << rr.center << ", " << rr.angle << ", " << rr.size << std::endl; + + if (resBoxes[i].m_boxMask.size() != br.size()) + { + br.width = resBoxes[i].m_boxMask.cols; + br.height = resBoxes[i].m_boxMask.rows; + if (br.x + br.width >= frameSize.width) + br.x = frameSize.width - br.width; + if (br.y + br.height >= frameSize.height) + br.y = frameSize.height - br.height; + } + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + break; + } + } + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp b/src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp new file mode 100644 index 000000000..7c2b98ce2 --- /dev/null +++ b/src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp @@ -0,0 +1,124 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_obb_onnx class +/// +class YOLOv11_obb_onnx : public YoloONNX +{ +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x20x21504 + //20: 15 DOTA classes + x + y + w + h + a + constexpr int shapeDataSize = 5; + + const float fw = static_cast(frameSize.width) / static_cast(m_inputDims.d[3]); + const float fh = static_cast(frameSize.height) / static_cast(m_inputDims.d[2]); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = m_outpuDims[0].d[ncInd] - shapeDataSize; + int dimensions = nc + shapeDataSize; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + shapeDataSize); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // for (int jj = 0; jj < 20; ++jj) + // { + // std::cout << output[jj] << " "; + // } + // std::cout << std::endl; + //} + + if (objectConf >= m_params.confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) + float cx = fw * output[k]; + float cy = fh * output[k + 1]; + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI; + rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); + + //if (rectBoxes.size() == 1) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + //std::vector indices; + //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + //resBoxes.reserve(indices.size()); + + resBoxes.reserve(rectBoxes.size()); + for (size_t bi = 0; bi < rectBoxes.size(); ++bi) + { + resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_yolo/class_detector.cpp b/src/Detector/tensorrt_yolo/class_detector.cpp index f7a18e236..895e5d96f 100644 --- a/src/Detector/tensorrt_yolo/class_detector.cpp +++ b/src/Detector/tensorrt_yolo/class_detector.cpp @@ -10,6 +10,10 @@ #include "YoloONNXv8_instance.hpp" #include "YoloONNXv9_bb.hpp" #include "YoloONNXv10_bb.hpp" +#include "YoloONNXv11_bb.hpp" +#include "YoloONNXv11_obb.hpp" +#include "YoloONNXv11_instance.hpp" + namespace tensor_rt { @@ -110,6 +114,22 @@ namespace tensor_rt m_params.outputTensorNames.push_back("output0"); m_detector = std::make_unique(); break; + case ModelType::YOLOV11: + m_params.inputTensorNames.push_back("images"); + m_params.outputTensorNames.push_back("output0"); + m_detector = std::make_unique(); + break; + case ModelType::YOLOV11_OBB: + m_params.inputTensorNames.push_back("images"); + m_params.outputTensorNames.push_back("output0"); + m_detector = std::make_unique(); + break; + case ModelType::YOLOV11Mask: + m_params.inputTensorNames.push_back("images"); + m_params.outputTensorNames.push_back("output0"); + m_params.outputTensorNames.push_back("output1"); + m_detector = std::make_unique(); + break; } // Threshold values @@ -193,7 +213,8 @@ namespace tensor_rt if (config.net_type == ModelType::YOLOV6 || config.net_type == ModelType::YOLOV7 || config.net_type == ModelType::YOLOV7Mask || config.net_type == ModelType::YOLOV8 || config.net_type == ModelType::YOLOV8_OBB || config.net_type == ModelType::YOLOV8Mask || - config.net_type == ModelType::YOLOV9 || config.net_type == ModelType::YOLOV10) + config.net_type == ModelType::YOLOV9 || config.net_type == ModelType::YOLOV10 || + config.net_type == ModelType::YOLOV11 || config.net_type == ModelType::YOLOV11_OBB || config.net_type == ModelType::YOLOV11Mask) m_impl = new YoloONNXImpl(); else m_impl = new YoloDectectorImpl(); diff --git a/src/Detector/tensorrt_yolo/class_detector.h b/src/Detector/tensorrt_yolo/class_detector.h index 1dd85d709..b4da0d0a0 100644 --- a/src/Detector/tensorrt_yolo/class_detector.h +++ b/src/Detector/tensorrt_yolo/class_detector.h @@ -54,7 +54,10 @@ namespace tensor_rt YOLOV8_OBB, YOLOV8Mask, YOLOV9, - YOLOV10 + YOLOV10, + YOLOV11, + YOLOV11_OBB, + YOLOV11Mask }; /// diff --git a/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake b/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake index 7ec8d9980..b00993057 100644 --- a/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake +++ b/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake @@ -1,72 +1,115 @@ +# ~~~ +# Copyright 2021 Olivier Le Doeuff +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # This module defines the following variables: # -# :: +# - TensorRT_FOUND: A boolean specifying whether or not TensorRT was found. +# - TensorRT_VERSION: The exact version of TensorRT found +# - TensorRT_VERSION_MAJOR: The major version of TensorRT. +# - TensorRT_VERSION_MINOR: The minor version of TensorRT. +# - TensorRT_VERSION_PATCH: The patch version of TensorRT. +# - TensorRT_VERSION_TWEAK: The tweak version of TensorRT. +# - TensorRT_INCLUDE_DIRS: The path to TensorRT ``include`` folder containing the header files required to compile a project linking against TensorRT. +# - TensorRT_LIBRARY_DIRS: The path to TensorRT library directory that contains libraries. # -# TensorRT_INCLUDE_DIRS -# TensorRT_LIBRARIES -# TensorRT_FOUND -# -# :: -# -# TensorRT_VERSION_STRING - version (x.y.z) -# TensorRT_VERSION_MAJOR - major version (x) -# TensorRT_VERSION_MINOR - minor version (y) -# TensorRT_VERSION_PATCH - patch version (z) +# This module create following targets: +# - trt::nvinfer +# - trt::nvinfer_plugin +# - trt::nvonnxparser +# - trt::nvparsers +# This script was inspired from https://github.com/NicolasIRAGNE/CMakeScripts +# This script was inspired from https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake # # Hints # ^^^^^ # A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look. -# -set(_TensorRT_SEARCHES) +# ~~~ -if(TensorRT_ROOT) - set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_ROOT} NO_DEFAULT_PATH) - list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT) +if(NOT TensorRT_FIND_COMPONENTS) + set(TensorRT_FIND_COMPONENTS nvinfer nvinfer_plugin nvonnxparser) endif() +set(TensorRT_LIBRARIES) -# appends some common paths -set(_TensorRT_SEARCH_NORMAL - PATHS "/usr" +# find the include directory of TensorRT +find_path( + TensorRT_INCLUDE_DIR + NAMES NvInfer.h + PATHS ${TensorRT_ROOT} ENV TensorRT_ROOT + PATH_SUFFIXES include ) -list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL) - -# Include dir -foreach(search ${_TensorRT_SEARCHES}) - find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include) -endforeach() -if(NOT TensorRT_LIBRARY) - foreach(search ${_TensorRT_SEARCHES}) - find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib) - endforeach() +string(FIND ${TensorRT_INCLUDE_DIR} "NOTFOUND" _include_dir_notfound) +if(NOT _include_dir_notfound EQUAL -1) + if(TensorRT_FIND_REQUIRED) + message(FATAL_ERROR "Fail to find TensorRT, please set TensorRT_ROOT. Include path not found.") + endif() + return() endif() +set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) -mark_as_advanced(TensorRT_INCLUDE_DIR) - -if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") +# Extract version of tensorrt +if(EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_TWEAK REGEX "^#define NV_TENSORRT_BUILD [0-9]+.*$") - string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") - string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") - string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") - set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") + string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") + string(REGEX REPLACE "^#define NV_TENSORRT_BUILD ([0-9]+).*$" "\\1" TensorRT_VERSION_TWEAK "${TensorRT_TWEAK}") + set(TensorRT_VERSION "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}.${TensorRT_VERSION_TWEAK}") endif() -include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING) +function(_find_trt_component component) + + # Find library for component (ie nvinfer, nvparsers, etc...) + find_library( + TensorRT_${component}_LIBRARY + NAMES ${component} + PATHS ${TensorRT_ROOT} ${TENSORRT_LIBRARY_DIR} ENV TensorRT_ROOT + ) -if(TensorRT_FOUND) - set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) + string(FIND ${TensorRT_${component}_LIBRARY} "NOTFOUND" _library_not_found) - if(NOT TensorRT_LIBRARIES) - set(TensorRT_LIBRARIES ${TensorRT_LIBRARY}) + if(NOT TensorRT_LIBRARY_DIR) + get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY) + set(TensorRT_LIBRARY_DIR + "${_path}" + CACHE INTERNAL "TensorRT_LIBRARY_DIR" + ) endif() - if(NOT TARGET TensorRT::TensorRT) - add_library(TensorRT::TensorRT UNKNOWN IMPORTED) - set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}") - set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}") + if(NOT TensorRT_LIBRARY_DIRS) + get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY) + set(TensorRT_LIBRARY_DIRS + "${_path}" + CACHE INTERNAL "TensorRT_LIBRARY_DIRS" + ) endif() -endif() + + # Library found, and doesn't already exists + if(_library_not_found EQUAL -1 AND NOT TARGET trt::${component}) + set(TensorRT_${component}_FOUND + TRUE + CACHE INTERNAL "Found ${component}" + ) + + # Create a target + add_library(trt::${component} IMPORTED INTERFACE) + target_include_directories(trt::${component} SYSTEM INTERFACE "${TensorRT_INCLUDE_DIRS}") + target_link_libraries(trt::${component} INTERFACE "${TensorRT_${component}_LIBRARY}") + set(TensorRT_LIBRARIES ${TensorRT_LIBRARIES} ${TensorRT_${component}_LIBRARY}) + endif() + +endfunction() + +# Find each components +foreach(component IN LISTS TensorRT_FIND_COMPONENTS) + _find_trt_component(${component}) +endforeach() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(TensorRT HANDLE_COMPONENTS VERSION_VAR TensorRT_VERSION REQUIRED_VARS TensorRT_INCLUDE_DIR) diff --git a/src/Detector/tensorrt_yolo/common/sampleEngines.cpp_ b/src/Detector/tensorrt_yolo/common/sampleEngines.cpp similarity index 99% rename from src/Detector/tensorrt_yolo/common/sampleEngines.cpp_ rename to src/Detector/tensorrt_yolo/common/sampleEngines.cpp index 8ada0526d..dacf6f2a7 100644 --- a/src/Detector/tensorrt_yolo/common/sampleEngines.cpp_ +++ b/src/Detector/tensorrt_yolo/common/sampleEngines.cpp @@ -559,7 +559,7 @@ void setLayerDeviceTypes( if (match != layerDeviceTypes.end()) { DeviceType const deviceType = match->second; - sample::gLogInfo << "Set layer " << layerName << " to device type " << deviceType << std::endl; + sample::gLogInfo << "Set layer " << layerName << " to device type " << (int)deviceType << std::endl; config.setDeviceType(layer, deviceType); } } @@ -845,7 +845,11 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, if (build.maxTactics != defaultMaxTactics) { +#if (NV_TENSORRT_MAJOR < 9) config.setMaxNbTactics(build.maxTactics); +#else + config.setTacticSources(build.maxTactics); +#endif } if (build.timingCacheMode == TimingCacheMode::kDISABLE) diff --git a/src/Detector/tensorrt_yolo/common/sampleInference.cpp_ b/src/Detector/tensorrt_yolo/common/sampleInference.cpp similarity index 100% rename from src/Detector/tensorrt_yolo/common/sampleInference.cpp_ rename to src/Detector/tensorrt_yolo/common/sampleInference.cpp diff --git a/src/Detector/tensorrt_yolo/common_deprecated/BatchStream.h b/src/Detector/tensorrt_yolo/common_deprecated/BatchStream.h deleted file mode 100644 index 9eaac768b..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/BatchStream.h +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef BATCH_STREAM_H -#define BATCH_STREAM_H - -#include "NvInfer.h" -#include "common.h" -#include -#include -#include - -class IBatchStream -{ -public: - virtual void reset(int firstBatch) = 0; - virtual bool next() = 0; - virtual void skip(int skipCount) = 0; - virtual float* getBatch() = 0; - virtual float* getLabels() = 0; - virtual int getBatchesRead() const = 0; - virtual int getBatchSize() const = 0; - virtual nvinfer1::Dims getDims() const = 0; -}; - -class MNISTBatchStream : public IBatchStream -{ -public: - MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, const std::string& labelsFile, - const std::vector& directories) - : mBatchSize{batchSize} - , mMaxBatches{maxBatches} - , mDims{3, {1, 28, 28}} //!< We already know the dimensions of MNIST images. - { - readDataFile(locateFile(dataFile, directories)); - readLabelsFile(locateFile(labelsFile, directories)); - } - - void reset(int firstBatch) override - { - mBatchCount = firstBatch; - } - - bool next() override - { - if (mBatchCount >= mMaxBatches) - { - return false; - } - ++mBatchCount; - return true; - } - - void skip(int skipCount) override - { - mBatchCount += skipCount; - } - - float* getBatch() override - { - return mData.data() + (mBatchCount * mBatchSize * samplesCommon::volume(mDims)); - } - - float* getLabels() override - { - return mLabels.data() + (mBatchCount * mBatchSize); - } - - int getBatchesRead() const override - { - return mBatchCount; - } - - int getBatchSize() const override - { - return mBatchSize; - } - - nvinfer1::Dims getDims() const override - { - return nvinfer1::Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}}; - } - -private: - void readDataFile(const std::string& dataFilePath) - { - std::ifstream file{dataFilePath.c_str(), std::ios::binary}; - - int magicNumber, numImages, imageH, imageW; - file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); - // All values in the MNIST files are big endian. - magicNumber = samplesCommon::swapEndianness(magicNumber); - ASSERT(magicNumber == 2051 && "Magic Number does not match the expected value for an MNIST image set"); - - // Read number of images and dimensions - file.read(reinterpret_cast(&numImages), sizeof(numImages)); - file.read(reinterpret_cast(&imageH), sizeof(imageH)); - file.read(reinterpret_cast(&imageW), sizeof(imageW)); - - numImages = samplesCommon::swapEndianness(numImages); - imageH = samplesCommon::swapEndianness(imageH); - imageW = samplesCommon::swapEndianness(imageW); - - // The MNIST data is made up of unsigned bytes, so we need to cast to float and normalize. - int numElements = numImages * imageH * imageW; - std::vector rawData(numElements); - file.read(reinterpret_cast(rawData.data()), numElements * sizeof(uint8_t)); - mData.resize(numElements); - std::transform( - rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast(val) / 255.f; }); - } - - void readLabelsFile(const std::string& labelsFilePath) - { - std::ifstream file{labelsFilePath.c_str(), std::ios::binary}; - int magicNumber, numImages; - file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); - // All values in the MNIST files are big endian. - magicNumber = samplesCommon::swapEndianness(magicNumber); - ASSERT(magicNumber == 2049 && "Magic Number does not match the expected value for an MNIST labels file"); - - file.read(reinterpret_cast(&numImages), sizeof(numImages)); - numImages = samplesCommon::swapEndianness(numImages); - - std::vector rawLabels(numImages); - file.read(reinterpret_cast(rawLabels.data()), numImages * sizeof(uint8_t)); - mLabels.resize(numImages); - std::transform( - rawLabels.begin(), rawLabels.end(), mLabels.begin(), [](uint8_t val) { return static_cast(val); }); - } - - int mBatchSize{0}; - int mBatchCount{0}; //!< The batch that will be read on the next invocation of next() - int mMaxBatches{0}; - nvinfer1::Dims mDims{}; - std::vector mData{}; - std::vector mLabels{}; -}; - -class BatchStream : public IBatchStream -{ -public: - BatchStream( - int batchSize, int maxBatches, std::string prefix, std::string suffix, std::vector directories) - : mBatchSize(batchSize) - , mMaxBatches(maxBatches) - , mPrefix(prefix) - , mSuffix(suffix) - , mDataDir(directories) - { - FILE* file = fopen(locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), "rb"); - ASSERT(file != nullptr); - int d[4]; - size_t readSize = fread(d, sizeof(int), 4, file); - ASSERT(readSize == 4); - mDims.nbDims = 4; // The number of dimensions. - mDims.d[0] = d[0]; // Batch Size - mDims.d[1] = d[1]; // Channels - mDims.d[2] = d[2]; // Height - mDims.d[3] = d[3]; // Width - ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0); - fclose(file); - - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; - mBatch.resize(mBatchSize * mImageSize, 0); - mLabels.resize(mBatchSize, 0); - mFileBatch.resize(mDims.d[0] * mImageSize, 0); - mFileLabels.resize(mDims.d[0], 0); - reset(0); - } - - BatchStream(int batchSize, int maxBatches, std::string prefix, std::vector directories) - : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) - { - } - - BatchStream( - int batchSize, int maxBatches, nvinfer1::Dims dims, std::string listFile, std::vector directories) - : mBatchSize(batchSize) - , mMaxBatches(maxBatches) - , mDims(dims) - , mListFile(listFile) - , mDataDir(directories) - { - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; - mBatch.resize(mBatchSize * mImageSize, 0); - mLabels.resize(mBatchSize, 0); - mFileBatch.resize(mDims.d[0] * mImageSize, 0); - mFileLabels.resize(mDims.d[0], 0); - reset(0); - } - - // Resets data members - void reset(int firstBatch) override - { - mBatchCount = 0; - mFileCount = 0; - mFileBatchPos = mDims.d[0]; - skip(firstBatch); - } - - // Advance to next batch and return true, or return false if there is no batch left. - bool next() override - { - if (mBatchCount == mMaxBatches) - { - return false; - } - - for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) - { - ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); - if (mFileBatchPos == mDims.d[0] && !update()) - { - return false; - } - - // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer. - csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); - std::copy_n( - getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize); - std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos); - } - mBatchCount++; - return true; - } - - // Skips the batches - void skip(int skipCount) override - { - if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0]) - { - mFileCount += skipCount * mBatchSize / mDims.d[0]; - return; - } - - int x = mBatchCount; - for (int i = 0; i < skipCount; i++) - { - next(); - } - mBatchCount = x; - } - - float* getBatch() override - { - return mBatch.data(); - } - - float* getLabels() override - { - return mLabels.data(); - } - - int getBatchesRead() const override - { - return mBatchCount; - } - - int getBatchSize() const override - { - return mBatchSize; - } - - nvinfer1::Dims getDims() const override - { - return mDims; - } - -private: - float* getFileBatch() - { - return mFileBatch.data(); - } - - float* getFileLabels() - { - return mFileLabels.data(); - } - - bool update() - { - if (mListFile.empty()) - { - std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir); - FILE* file = fopen(inputFileName.c_str(), "rb"); - if (!file) - { - return false; - } - - int d[4]; - size_t readSize = fread(d, sizeof(int), 4, file); - ASSERT(readSize == 4); - ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]); - size_t readInputCount = fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file); - ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize)); - size_t readLabelCount = fread(getFileLabels(), sizeof(float), mDims.d[0], file); - ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0])); - - fclose(file); - } - else - { - std::vector fNames; - std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary); - if (!file) - { - return false; - } - - sample::gLogInfo << "Batch #" << mFileCount << std::endl; - file.seekg(((mBatchCount * mBatchSize)) * 7); - - for (int i = 1; i <= mBatchSize; i++) - { - std::string sName; - std::getline(file, sName); - sName = sName + ".ppm"; - sample::gLogInfo << "Calibrating with file " << sName << std::endl; - fNames.emplace_back(sName); - } - - mFileCount++; - - const int imageC = 3; - const int imageH = 300; - const int imageW = 300; - std::vector> ppms(fNames.size()); - for (uint32_t i = 0; i < fNames.size(); ++i) - { - readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]); - } - - std::vector data(samplesCommon::volume(mDims)); - const float scale = 2.0 / 255.0; - const float bias = 1.0; - long int volChl = mDims.d[2] * mDims.d[3]; - - // Normalize input data - for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; i < mBatchSize; ++i) - { - for (int c = 0; c < mDims.d[1]; ++c) - { - for (int j = 0; j < volChl; ++j) - { - data[i * volImg + c * volChl + j] = scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias; - } - } - } - - std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch()); - } - - mFileBatchPos = 0; - return true; - } - - int mBatchSize{0}; - int mMaxBatches{0}; - int mBatchCount{0}; - int mFileCount{0}; - int mFileBatchPos{0}; - int mImageSize{0}; - std::vector mBatch; //!< Data for the batch - std::vector mLabels; //!< Labels for the batch - std::vector mFileBatch; //!< List of image files - std::vector mFileLabels; //!< List of label files - std::string mPrefix; //!< Batch file name prefix - std::string mSuffix; //!< Batch file name suffix - nvinfer1::Dims mDims; //!< Input dimensions - std::string mListFile; //!< File name of the list of image names - std::vector mDataDir; //!< Directories where the files can be found -}; - -#endif diff --git a/src/Detector/tensorrt_yolo/common_deprecated/EntropyCalibrator.h b/src/Detector/tensorrt_yolo/common_deprecated/EntropyCalibrator.h deleted file mode 100644 index f31789bf2..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/EntropyCalibrator.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ENTROPY_CALIBRATOR_H -#define ENTROPY_CALIBRATOR_H - -#include "BatchStream.h" -#include "NvInfer.h" - -//! \class EntropyCalibratorImpl -//! -//! \brief Implements common functionality for Entropy calibrators. -//! -template -class EntropyCalibratorImpl -{ -public: - EntropyCalibratorImpl( - TBatchStream stream, int firstBatch, std::string networkName, const char* inputBlobName, bool readCache = true) - : mStream{stream} - , mCalibrationTableName("CalibrationTable" + networkName) - , mInputBlobName(inputBlobName) - , mReadCache(readCache) - { - nvinfer1::Dims dims = mStream.getDims(); - mInputCount = samplesCommon::volume(dims); - CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); - mStream.reset(firstBatch); - } - - virtual ~EntropyCalibratorImpl() - { - CHECK(cudaFree(mDeviceInput)); - } - - int getBatchSize() const noexcept - { - return mStream.getBatchSize(); - } - - bool getBatch(void* bindings[], const char* names[], int /*nbBindings*/) noexcept - { - if (!mStream.next()) - return false; - - CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); - ASSERT(!strcmp(names[0], mInputBlobName)); - bindings[0] = mDeviceInput; - return true; - } - - const void* readCalibrationCache(size_t& length) noexcept - { - mCalibrationCache.clear(); - std::ifstream input(mCalibrationTableName, std::ios::binary); - input >> std::noskipws; - if (mReadCache && input.good()) - { - std::copy(std::istream_iterator(input), std::istream_iterator(), - std::back_inserter(mCalibrationCache)); - } - length = mCalibrationCache.size(); - return length ? mCalibrationCache.data() : nullptr; - } - - void writeCalibrationCache(const void* cache, size_t length) noexcept - { - std::ofstream output(mCalibrationTableName, std::ios::binary); - output.write(reinterpret_cast(cache), length); - } - -private: - TBatchStream mStream; - size_t mInputCount; - std::string mCalibrationTableName; - const char* mInputBlobName; - bool mReadCache{true}; - void* mDeviceInput{nullptr}; - std::vector mCalibrationCache; -}; - -//! \class Int8EntropyCalibrator2 -//! -//! \brief Implements Entropy calibrator 2. -//! CalibrationAlgoType is kENTROPY_CALIBRATION_2. -//! -template -class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 -{ -public: - Int8EntropyCalibrator2( - TBatchStream stream, int firstBatch, const char* networkName, const char* inputBlobName, bool readCache = true) - : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) - { - } - - int getBatchSize() const noexcept override - { - return mImpl.getBatchSize(); - } - - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override - { - return mImpl.getBatch(bindings, names, nbBindings); - } - - const void* readCalibrationCache(size_t& length) noexcept override - { - return mImpl.readCalibrationCache(length); - } - - void writeCalibrationCache(const void* cache, size_t length) noexcept override - { - mImpl.writeCalibrationCache(cache, length); - } - -private: - EntropyCalibratorImpl mImpl; -}; - -#endif // ENTROPY_CALIBRATOR_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/ErrorRecorder.h b/src/Detector/tensorrt_yolo/common_deprecated/ErrorRecorder.h deleted file mode 100644 index 40b35fb5c..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/ErrorRecorder.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ERROR_RECORDER_H -#define ERROR_RECORDER_H -#include "NvInferRuntimeCommon.h" -#include "logger.h" -#include -#include -#include -#include -#include - -using nvinfer1::IErrorRecorder; -using nvinfer1::ErrorCode; - -//! -//! A simple implementation of the IErrorRecorder interface for -//! use by samples. This interface also can be used as a reference -//! implementation. -//! The sample Error recorder is based on a vector that pairs the error -//! code and the error string into a single element. It also uses -//! standard mutex's and atomics in order to make sure that the code -//! works in a multi-threaded environment. -//! -class SampleErrorRecorder : public IErrorRecorder -{ - using errorPair = std::pair; - using errorStack = std::vector; - -public: - SampleErrorRecorder() = default; - - virtual ~SampleErrorRecorder() noexcept {} - int32_t getNbErrors() const noexcept final - { - return mErrorStack.size(); - } - ErrorCode getErrorCode(int32_t errorIdx) const noexcept final - { - return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; - }; - IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final - { - return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); - } - // This class can never overflow since we have dynamic resize via std::vector usage. - bool hasOverflowed() const noexcept final - { - return false; - } - - // Empty the errorStack. - void clear() noexcept final - { - try - { - // grab a lock so that there is no addition while clearing. - std::lock_guard guard(mStackLock); - mErrorStack.clear(); - } - catch (const std::exception& e) - { - sample::gLogFatal << "Internal Error: " << e.what() << std::endl; - } - }; - - //! Simple helper function that - bool empty() const noexcept - { - return mErrorStack.empty(); - } - - bool reportError(ErrorCode val, IErrorRecorder::ErrorDesc desc) noexcept final - { - try - { - std::lock_guard guard(mStackLock); - sample::gLogError << "Error[" << static_cast(val) << "]: " << desc << std::endl; - mErrorStack.push_back(errorPair(val, desc)); - } - catch (const std::exception& e) - { - sample::gLogFatal << "Internal Error: " << e.what() << std::endl; - } - // All errors are considered fatal. - return true; - } - - // Atomically increment or decrement the ref counter. - IErrorRecorder::RefCount incRefCount() noexcept final - { - return ++mRefCount; - } - IErrorRecorder::RefCount decRefCount() noexcept final - { - return --mRefCount; - } - -private: - // Simple helper functions. - const errorPair& operator[](size_t index) const noexcept - { - return mErrorStack[index]; - } - - bool invalidIndexCheck(int32_t index) const noexcept - { - // By converting signed to unsigned, we only need a single check since - // negative numbers turn into large positive greater than the size. - size_t sIndex = index; - return sIndex >= mErrorStack.size(); - } - // Mutex to hold when locking mErrorStack. - std::mutex mStackLock; - - // Reference count of the class. Destruction of the class when mRefCount - // is not zero causes undefined behavior. - std::atomic mRefCount{0}; - - // The error stack that holds the errors recorded by TensorRT. - errorStack mErrorStack; -}; // class SampleErrorRecorder -#endif // ERROR_RECORDER_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/buffers.h b/src/Detector/tensorrt_yolo/common_deprecated/buffers.h deleted file mode 100644 index ef673b2b8..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/buffers.h +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef TENSORRT_BUFFERS_H -#define TENSORRT_BUFFERS_H - -#include "NvInfer.h" -#include "common.h" -#include "half.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace samplesCommon -{ - -//! -//! \brief The GenericBuffer class is a templated class for buffers. -//! -//! \details This templated RAII (Resource Acquisition Is Initialization) class handles the allocation, -//! deallocation, querying of buffers on both the device and the host. -//! It can handle data of arbitrary types because it stores byte buffers. -//! The template parameters AllocFunc and FreeFunc are used for the -//! allocation and deallocation of the buffer. -//! AllocFunc must be a functor that takes in (void** ptr, size_t size) -//! and returns bool. ptr is a pointer to where the allocated buffer address should be stored. -//! size is the amount of memory in bytes to allocate. -//! The boolean indicates whether or not the memory allocation was successful. -//! FreeFunc must be a functor that takes in (void* ptr) and returns void. -//! ptr is the allocated buffer address. It must work with nullptr input. -//! -template -class GenericBuffer -{ -public: - //! - //! \brief Construct an empty buffer. - //! - GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT) - : mSize(0) - , mCapacity(0) - , mType(type) - , mBuffer(nullptr) - { - } - - //! - //! \brief Construct a buffer with the specified allocation size in bytes. - //! - GenericBuffer(size_t size, nvinfer1::DataType type) - : mSize(size) - , mCapacity(size) - , mType(type) - { - if (!allocFn(&mBuffer, this->nbBytes())) - { - throw std::bad_alloc(); - } - } - - GenericBuffer(GenericBuffer&& buf) - : mSize(buf.mSize) - , mCapacity(buf.mCapacity) - , mType(buf.mType) - , mBuffer(buf.mBuffer) - { - buf.mSize = 0; - buf.mCapacity = 0; - buf.mType = nvinfer1::DataType::kFLOAT; - buf.mBuffer = nullptr; - } - - GenericBuffer& operator=(GenericBuffer&& buf) - { - if (this != &buf) - { - freeFn(mBuffer); - mSize = buf.mSize; - mCapacity = buf.mCapacity; - mType = buf.mType; - mBuffer = buf.mBuffer; - // Reset buf. - buf.mSize = 0; - buf.mCapacity = 0; - buf.mBuffer = nullptr; - } - return *this; - } - - //! - //! \brief Returns pointer to underlying array. - //! - void* data() - { - return mBuffer; - } - - //! - //! \brief Returns pointer to underlying array. - //! - const void* data() const - { - return mBuffer; - } - - //! - //! \brief Returns the size (in number of elements) of the buffer. - //! - size_t size() const - { - return mSize; - } - - //! - //! \brief Returns the size (in bytes) of the buffer. - //! - size_t nbBytes() const - { - return this->size() * samplesCommon::getElementSize(mType); - } - - //! - //! \brief Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity. - //! - void resize(size_t newSize) - { - mSize = newSize; - if (mCapacity < newSize) - { - freeFn(mBuffer); - if (!allocFn(&mBuffer, this->nbBytes())) - { - throw std::bad_alloc{}; - } - mCapacity = newSize; - } - } - - //! - //! \brief Overload of resize that accepts Dims - //! - void resize(const nvinfer1::Dims& dims) - { - return this->resize(samplesCommon::volume(dims)); - } - - ~GenericBuffer() - { - freeFn(mBuffer); - } - -private: - size_t mSize{0}, mCapacity{0}; - nvinfer1::DataType mType; - void* mBuffer; - AllocFunc allocFn; - FreeFunc freeFn; -}; - -class DeviceAllocator -{ -public: - bool operator()(void** ptr, size_t size) const - { - return cudaMalloc(ptr, size) == cudaSuccess; - } -}; - -class DeviceFree -{ -public: - void operator()(void* ptr) const - { - cudaFree(ptr); - } -}; - -class HostAllocator -{ -public: - bool operator()(void** ptr, size_t size) const - { - *ptr = malloc(size); - return *ptr != nullptr; - } -}; - -class HostFree -{ -public: - void operator()(void* ptr) const - { - free(ptr); - } -}; - -using DeviceBuffer = GenericBuffer; -using HostBuffer = GenericBuffer; - -//! -//! \brief The ManagedBuffer class groups together a pair of corresponding device and host buffers. -//! -class ManagedBuffer -{ -public: - DeviceBuffer deviceBuffer; - HostBuffer hostBuffer; -}; - -//! -//! \brief The BufferManager class handles host and device buffer allocation and deallocation. -//! -//! \details This RAII class handles host and device buffer allocation and deallocation, -//! memcpy between host and device buffers to aid with inference, -//! and debugging dumps to validate inference. The BufferManager class is meant to be -//! used to simplify buffer management and any interactions between buffers and the engine. -//! -class BufferManager -{ -public: - static const size_t kINVALID_SIZE_VALUE = ~size_t(0); - - //! - //! \brief Create a BufferManager for handling buffer interactions with engine. - //! - BufferManager(std::shared_ptr engine, const int batchSize, - const nvinfer1::IExecutionContext* context = nullptr) - : mEngine(engine) - , mBatchSize(batchSize) - { - // Full Dims implies no batch size. - auto impbs = engine->hasImplicitBatchDimension(); - std::cout << "hasImplicitBatchDimension: " << impbs << ", mBatchSize = " << mBatchSize << std::endl; - assert(engine->hasImplicitBatchDimension() || mBatchSize == 0); - // Create host and device buffers - for (int i = 0; i < mEngine->getNbBindings(); i++) - { - auto dims = context ? context->getBindingDimensions(i) : mEngine->getBindingDimensions(i); - size_t vol = context || !mBatchSize ? 1 : static_cast(mBatchSize); - nvinfer1::DataType type = mEngine->getBindingDataType(i); - int vecDim = mEngine->getBindingVectorizedDim(i); - if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector - { - int scalarsPerVec = mEngine->getBindingComponentsPerElement(i); - dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec); - vol *= scalarsPerVec; - } - vol *= samplesCommon::volume(dims); - std::unique_ptr manBuf{new ManagedBuffer()}; - manBuf->deviceBuffer = DeviceBuffer(vol, type); - manBuf->hostBuffer = HostBuffer(vol, type); - mDeviceBindings.emplace_back(manBuf->deviceBuffer.data()); - mManagedBuffers.emplace_back(std::move(manBuf)); - } - } - - //! - //! \brief Returns a vector of device buffers that you can use directly as - //! bindings for the execute and enqueue methods of IExecutionContext. - //! - std::vector& getDeviceBindings() - { - return mDeviceBindings; - } - - //! - //! \brief Returns a vector of device buffers. - //! - const std::vector& getDeviceBindings() const - { - return mDeviceBindings; - } - - //! - //! \brief Returns the device buffer corresponding to tensorName. - //! Returns nullptr if no such tensor can be found. - //! - void* getDeviceBuffer(const std::string& tensorName) const - { - return getBuffer(false, tensorName); - } - - //! - //! \brief Returns the host buffer corresponding to tensorName. - //! Returns nullptr if no such tensor can be found. - //! - void* getHostBuffer(const std::string& tensorName) const - { - return getBuffer(true, tensorName); - } - - //! - //! \brief Returns the host buffer corresponding to tensorName. - //! Returns nullptr if no such tensor can be found. - //! - void* getHostBuffer(int bindingIndex) const - { - return getBuffer(true, bindingIndex); - } - - //! - //! \brief Returns the size of the host and device buffers that correspond to tensorName. - //! Returns kINVALID_SIZE_VALUE if no such tensor can be found. - //! - size_t size(const std::string& tensorName) const - { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) - return kINVALID_SIZE_VALUE; - return mManagedBuffers[index]->hostBuffer.nbBytes(); - } - - //! - //! \brief Dump host buffer with specified tensorName to ostream. - //! Prints error message to std::ostream if no such tensor can be found. - //! - void dumpBuffer(std::ostream& os, const std::string& tensorName) - { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) - { - os << "Invalid tensor name" << std::endl; - return; - } - void* buf = mManagedBuffers[index]->hostBuffer.data(); - size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes(); - nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index); - size_t rowCount = static_cast(bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize); - int leadDim = mBatchSize; - int* trailDims = bufDims.d; - int nbDims = bufDims.nbDims; - - // Fix explicit Dimension networks - if (!leadDim && nbDims > 0) - { - leadDim = bufDims.d[0]; - ++trailDims; - --nbDims; - } - - os << "[" << leadDim; - for (int i = 0; i < nbDims; i++) - os << ", " << trailDims[i]; - os << "]" << std::endl; - switch (mEngine->getBindingDataType(index)) - { - case nvinfer1::DataType::kINT32: print(os, buf, bufSize, rowCount); break; - case nvinfer1::DataType::kFLOAT: print(os, buf, bufSize, rowCount); break; - case nvinfer1::DataType::kHALF: print(os, buf, bufSize, rowCount); break; - case nvinfer1::DataType::kINT8: assert(0 && "Int8 network-level input and output is not supported"); break; - case nvinfer1::DataType::kBOOL: assert(0 && "Bool network-level input and output are not supported"); break; - } - } - - //! - //! \brief Templated print function that dumps buffers of arbitrary type to std::ostream. - //! rowCount parameter controls how many elements are on each line. - //! A rowCount of 1 means that there is only 1 element on each line. - //! - template - void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) - { - assert(rowCount != 0); - assert(bufSize % sizeof(T) == 0); - T* typedBuf = static_cast(buf); - size_t numItems = bufSize / sizeof(T); - for (int i = 0; i < static_cast(numItems); i++) - { - // Handle rowCount == 1 case - if (rowCount == 1 && i != static_cast(numItems) - 1) - os << typedBuf[i] << std::endl; - else if (rowCount == 1) - os << typedBuf[i]; - // Handle rowCount > 1 case - else if (i % rowCount == 0) - os << typedBuf[i]; - else if (i % rowCount == rowCount - 1) - os << " " << typedBuf[i] << std::endl; - else - os << " " << typedBuf[i]; - } - } - - //! - //! \brief Copy the contents of input host buffers to input device buffers synchronously. - //! - void copyInputToDevice() - { - memcpyBuffers(true, false, false, 0); - } - - //! - //! \brief Copy the contents of output device buffers to output host buffers synchronously. - //! - void copyOutputToHost() - { - memcpyBuffers(false, true, false, 0); - } - - //! - //! \brief Copy the contents of input host buffers to input device buffers asynchronously. - //! - void copyInputToDeviceAsync(const cudaStream_t& stream) - { - memcpyBuffers(true, false, true, stream); - } - - //! - //! \brief Copy the contents of output device buffers to output host buffers asynchronously. - //! - void copyOutputToHostAsync(const cudaStream_t& stream) - { - memcpyBuffers(false, true, true, stream); - } - - ~BufferManager() = default; - -private: - void* getBuffer(const bool isHost, const std::string& tensorName) const - { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) - return nullptr; - return (isHost ? mManagedBuffers[index]->hostBuffer.data() : mManagedBuffers[index]->deviceBuffer.data()); - } - - void* getBuffer(const bool isHost, int bindingIndex) const - { - if (bindingIndex == -1) - return nullptr; - return (isHost ? mManagedBuffers[bindingIndex]->hostBuffer.data() : mManagedBuffers[bindingIndex]->deviceBuffer.data()); - } - - void memcpyBuffers(const bool copyInput, const bool deviceToHost, const bool async, const cudaStream_t& stream) - { - for (int i = 0; i < mEngine->getNbBindings(); i++) - { - void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data() : mManagedBuffers[i]->deviceBuffer.data(); - const void* srcPtr = deviceToHost ? mManagedBuffers[i]->deviceBuffer.data() : mManagedBuffers[i]->hostBuffer.data(); - const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes(); - const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice; - if ((copyInput && mEngine->bindingIsInput(i)) || (!copyInput && !mEngine->bindingIsInput(i))) - { - if (async) - CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream)); - else - CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType)); - } - } - } - - std::shared_ptr mEngine; //!< The pointer to the engine - int mBatchSize = 0; //!< The batch size for legacy networks, 0 otherwise. - std::vector> mManagedBuffers; //!< The vector of pointers to managed buffers - std::vector mDeviceBindings; //!< The vector of device buffers needed for engine execution -}; - -} // namespace samplesCommon - -#endif // TENSORRT_BUFFERS_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/common.h b/src/Detector/tensorrt_yolo/common_deprecated/common.h deleted file mode 100644 index 2270a2cd0..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/common.h +++ /dev/null @@ -1,963 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORRT_COMMON_H -#define TENSORRT_COMMON_H - -// For loadLibrary -#ifdef _MSC_VER -// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. -#ifndef NOMINMAX -#define NOMINMAX -#endif -#include -#undef NOMINMAX -#else -#include -#endif - -#include "NvInfer.h" -#include "NvInferPlugin.h" -#include "logger.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "safeCommon.h" - -#ifdef _MSC_VER -#define FN_NAME __FUNCTION__ -#else -#define FN_NAME __func__ -#endif - -#if defined(__aarch64__) || defined(__QNX__) -#define ENABLE_DLA_API 1 -#endif - -#define CHECK_RETURN_W_MSG(status, val, errMsg) \ - do \ - { \ - if (!(status)) \ - { \ - sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " << FN_NAME << "(), line " << __LINE__ \ - << std::endl; \ - return val; \ - } \ - } while (0) - -#undef ASSERT -#define ASSERT(condition) \ - do \ - { \ - if (!(condition)) \ - { \ - sample::gLogError << "Assertion failure: " << #condition << std::endl; \ - abort(); \ - } \ - } while (0) - - -#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "") - -#define OBJ_GUARD(A) std::unique_ptr - -template -OBJ_GUARD(T) -makeObjGuard(T_* t) -{ - CHECK(!(std::is_base_of::value || std::is_same::value)); - auto deleter = [](T* t) { t->destroy(); }; - return std::unique_ptr{static_cast(t), deleter}; -} - -constexpr long double operator"" _GiB(long double val) -{ - return val * (1 << 30); -} -constexpr long double operator"" _MiB(long double val) -{ - return val * (1 << 20); -} -constexpr long double operator"" _KiB(long double val) -{ - return val * (1 << 10); -} - -// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB. -// Since the return type is signed, -1_GiB will work as expected. -constexpr long long int operator"" _GiB(unsigned long long val) -{ - return val * (1 << 30); -} -constexpr long long int operator"" _MiB(unsigned long long val) -{ - return val * (1 << 20); -} -constexpr long long int operator"" _KiB(unsigned long long val) -{ - return val * (1 << 10); -} - -struct SimpleProfiler : public nvinfer1::IProfiler -{ - struct Record - { - float time{0}; - int count{0}; - }; - - virtual void reportLayerTime(const char* layerName, float ms) noexcept - { - mProfile[layerName].count++; - mProfile[layerName].time += ms; - if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end()) - { - mLayerNames.push_back(layerName); - } - } - - SimpleProfiler(const char* name, const std::vector& srcProfilers = std::vector()) - : mName(name) - { - for (const auto& srcProfiler : srcProfilers) - { - for (const auto& rec : srcProfiler.mProfile) - { - auto it = mProfile.find(rec.first); - if (it == mProfile.end()) - { - mProfile.insert(rec); - } - else - { - it->second.time += rec.second.time; - it->second.count += rec.second.count; - } - } - } - } - - friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value) - { - out << "========== " << value.mName << " profile ==========" << std::endl; - float totalTime = 0; - std::string layerNameStr = "TensorRT layer name"; - int maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); - for (const auto& elem : value.mProfile) - { - totalTime += elem.second.time; - maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); - } - - auto old_settings = out.flags(); - auto old_precision = out.precision(); - // Output header - { - out << std::setw(maxLayerNameLength) << layerNameStr << " "; - out << std::setw(12) << "Runtime, " - << "%" - << " "; - out << std::setw(12) << "Invocations" - << " "; - out << std::setw(12) << "Runtime, ms" << std::endl; - } - for (size_t i = 0; i < value.mLayerNames.size(); i++) - { - const std::string layerName = value.mLayerNames[i]; - auto elem = value.mProfile.at(layerName); - out << std::setw(maxLayerNameLength) << layerName << " "; - out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%" - << " "; - out << std::setw(12) << elem.count << " "; - out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl; - } - out.flags(old_settings); - out.precision(old_precision); - out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl; - - return out; - } - -private: - std::string mName; - std::vector mLayerNames; - std::map mProfile; -}; - -//! Locate path to file, given its filename or filepath suffix and possible dirs it might lie in. -//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path. -inline std::string locateFile( - const std::string& filepathSuffix, const std::vector& directories, bool reportError = true) -{ - const int MAX_DEPTH{10}; - bool found{false}; - std::string filepath; - - for (auto& dir : directories) - { - if (!dir.empty() && dir.back() != '/') - { -#ifdef _MSC_VER - filepath = dir + "\\" + filepathSuffix; -#else - filepath = dir + "/" + filepathSuffix; -#endif - } - else - { - filepath = dir + filepathSuffix; - } - - for (int i = 0; i < MAX_DEPTH && !found; i++) - { - const std::ifstream checkFile(filepath); - found = checkFile.is_open(); - if (found) - { - break; - } - - filepath = "../" + filepath; // Try again in parent dir - } - - if (found) - { - break; - } - - filepath.clear(); - } - - // Could not find the file - if (filepath.empty()) - { - const std::string dirList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(), - [](const std::string& a, const std::string& b) { return a + "\n\t" + b; }); - std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << dirList << std::endl; - - if (reportError) - { - std::cout << "&&&& FAILED" << std::endl; - exit(EXIT_FAILURE); - } - } - - return filepath; -} - -inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW) -{ - std::ifstream infile(fileName, std::ifstream::binary); - assert(infile.is_open() && "Attempting to read from a file that is not open."); - std::string magic, h, w, max; - infile >> magic >> h >> w >> max; - infile.seekg(1, infile.cur); - infile.read(reinterpret_cast(buffer), inH * inW); -} - -namespace samplesCommon -{ - -// Swaps endianness of an integral type. -template ::value, int>::type = 0> -inline T swapEndianness(const T& value) -{ - uint8_t bytes[sizeof(T)]; - for (int i = 0; i < static_cast(sizeof(T)); ++i) - { - bytes[sizeof(T) - 1 - i] = *(reinterpret_cast(&value) + i); - } - return *reinterpret_cast(bytes); -} - -class HostMemory -{ -public: - HostMemory() = delete; - virtual void* data() const noexcept - { - return mData; - } - virtual std::size_t size() const noexcept - { - return mSize; - } - virtual nvinfer1::DataType type() const noexcept - { - return mType; - } - virtual ~HostMemory() {} - -protected: - HostMemory(std::size_t size, nvinfer1::DataType type) - : mData{nullptr} - , mSize(size) - , mType(type) - { - } - void* mData; - std::size_t mSize; - nvinfer1::DataType mType; -}; - -template -class TypedHostMemory : public HostMemory -{ -public: - explicit TypedHostMemory(std::size_t size) - : HostMemory(size, dataType) - { - mData = new ElemType[size]; - }; - ~TypedHostMemory() noexcept - { - delete[](ElemType*) mData; - } - ElemType* raw() noexcept - { - return static_cast(data()); - } -}; - -using FloatMemory = TypedHostMemory; -using HalfMemory = TypedHostMemory; -using ByteMemory = TypedHostMemory; - -inline void* safeCudaMalloc(size_t memSize) -{ - void* deviceMem; - CHECK(cudaMalloc(&deviceMem, memSize)); - if (deviceMem == nullptr) - { - std::cerr << "Out of memory" << std::endl; - exit(1); - } - return deviceMem; -} - -inline bool isDebug() -{ - return (std::getenv("TENSORRT_DEBUG") ? true : false); -} - -struct InferDeleter -{ - template - void operator()(T* obj) const - { -#if (NV_TENSORRT_MAJOR < 8) - obj->destroy(); -#else - delete obj; -#endif - } -}; - -template -using SampleUniquePtr = std::unique_ptr; - -static auto StreamDeleter = [](cudaStream_t* pStream) - { - if (pStream) - { - cudaStreamDestroy(*pStream); - delete pStream; - } - }; - -inline std::unique_ptr makeCudaStream() -{ - std::unique_ptr pStream(new cudaStream_t, StreamDeleter); - if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) != cudaSuccess) - { - pStream.reset(nullptr); - } - - return pStream; -} - -//! Return vector of indices that puts magnitudes of sequence in descending order. -template -std::vector argMagnitudeSort(Iter begin, Iter end) -{ - std::vector indices(end - begin); - std::iota(indices.begin(), indices.end(), 0); - std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) { return std::abs(begin[j]) < std::abs(begin[i]); }); - return indices; -} - -inline bool readReferenceFile(const std::string& fileName, std::vector& refVector) -{ - std::ifstream infile(fileName); - if (!infile.is_open()) - { - std::cout << "ERROR: readReferenceFile: Attempting to read from a file that is not open." << std::endl; - return false; - } - std::string line; - while (std::getline(infile, line)) - { - if (line.empty()) - continue; - refVector.push_back(line); - } - infile.close(); - return true; -} - -template -std::vector classify( - const std::vector& refVector, const std::vector& output, const size_t topK) -{ - const auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); - std::vector result; - result.reserve(topK); - for (size_t k = 0; k < topK; ++k) - { - result.push_back(refVector[inds[k]]); - } - return result; -} - -// Returns indices of highest K magnitudes in v. -template -std::vector topKMagnitudes(const std::vector& v, const size_t k) -{ - std::vector indices = samplesCommon::argMagnitudeSort(v.cbegin(), v.cend()); - indices.resize(k); - return indices; -} - -template -bool readASCIIFile(const std::string& fileName, const size_t size, std::vector& out) -{ - std::ifstream infile(fileName); - if (!infile.is_open()) - { - std::cout << "ERROR readASCIIFile: Attempting to read from a file that is not open." << std::endl; - return false; - } - out.clear(); - out.reserve(size); - out.assign(std::istream_iterator(infile), std::istream_iterator()); - infile.close(); - return true; -} - -template -bool writeASCIIFile(const std::string& fileName, const std::vector& in) -{ - std::ofstream outfile(fileName); - if (!outfile.is_open()) - { - std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is not open." << std::endl; - return false; - } - for (auto fn : in) - { - outfile << fn << "\n"; - } - outfile.close(); - return true; -} - -inline void print_version() -{ - std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "." << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH - << "." << NV_TENSORRT_BUILD << std::endl; -} - -inline std::string getFileType(const std::string& filepath) -{ - return filepath.substr(filepath.find_last_of(".") + 1); -} - -inline std::string toLower(const std::string& inp) -{ - std::string out = inp; - std::transform(out.begin(), out.end(), out.begin(), ::tolower); - return out; -} - -inline float getMaxValue(const float* buffer, int64_t size) -{ - assert(buffer != nullptr); - assert(size > 0); - return *std::max_element(buffer, buffer + size); -} - -// Ensures that every tensor used by a network has a dynamic range set. -// -// All tensors in a network must have a dynamic range specified if a calibrator is not used. -// This function is just a utility to globally fill in missing scales and zero-points for the entire network. -// -// If a tensor does not have a dyanamic range set, it is assigned inRange or outRange as follows: -// -// * If the tensor is the input to a layer or output of a pooling node, its dynamic range is derived from inRange. -// * Otherwise its dynamic range is derived from outRange. -// -// The default parameter values are intended to demonstrate, for final layers in the network, -// cases where dynamic ranges are asymmetric. -// -// The default parameter values choosen arbitrarily. Range values should be choosen such that -// we avoid underflow or overflow. Also range value should be non zero to avoid uniform zero scale tensor. -inline void setAllDynamicRanges(nvinfer1::INetworkDefinition* network, float inRange = 2.0f, float outRange = 4.0f) -{ - // Ensure that all layer inputs have a scale. - for (int i = 0; i < network->getNbLayers(); i++) - { - auto layer = network->getLayer(i); - for (int j = 0; j < layer->getNbInputs(); j++) - { - nvinfer1::ITensor* input{layer->getInput(j)}; - // Optional inputs are nullptr here and are from RNN layers. - if (input != nullptr && !input->dynamicRangeIsSet()) - { - ASSERT(input->setDynamicRange(-inRange, inRange)); - } - } - } - - // Ensure that all layer outputs have a scale. - // Tensors that are also inputs to layers are ingored here - // since the previous loop nest assigned scales to them. - for (int i = 0; i < network->getNbLayers(); i++) - { - auto layer = network->getLayer(i); - for (int j = 0; j < layer->getNbOutputs(); j++) - { - nvinfer1::ITensor* output{layer->getOutput(j)}; - // Optional outputs are nullptr here and are from RNN layers. - if (output != nullptr && !output->dynamicRangeIsSet()) - { - // Pooling must have the same input and output scales. - if (layer->getType() == nvinfer1::LayerType::kPOOLING) - { - ASSERT(output->setDynamicRange(-inRange, inRange)); - } - else - { - ASSERT(output->setDynamicRange(-outRange, outRange)); - } - } - } - } -} - -inline void setDummyInt8DynamicRanges(const nvinfer1::IBuilderConfig* c, nvinfer1::INetworkDefinition* n) -{ - // Set dummy per-tensor dynamic range if Int8 mode is requested. - if (c->getFlag(nvinfer1::BuilderFlag::kINT8)) - { - sample::gLogWarning - << "Int8 calibrator not provided. Generating dummy per-tensor dynamic range. Int8 accuracy is not guaranteed." - << std::endl; - setAllDynamicRanges(n); - } -} - -inline void enableDLA(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, int useDLACore, bool allowGPUFallback = true) -{ - if (useDLACore >= 0) - { - if (builder->getNbDLACores() == 0) - { - std::cerr << "Trying to use DLA core " << useDLACore << " on a platform that doesn't have any DLA cores" - << std::endl; - assert("Error: use DLA core on a platfrom that doesn't have any DLA cores" && false); - } - if (allowGPUFallback) - { - config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); - } - if (!config->getFlag(nvinfer1::BuilderFlag::kINT8)) - { - // User has not requested INT8 Mode. - // By default run in FP16 mode. FP32 mode is not permitted. - config->setFlag(nvinfer1::BuilderFlag::kFP16); - } - config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA); - config->setDLACore(useDLACore); - } -} - -inline int32_t parseDLA(int32_t argc, char** argv) -{ - for (int32_t i = 1; i < argc; i++) - { - if (strncmp(argv[i], "--useDLACore=", 13) == 0) - { - return std::stoi(argv[i] + 13); - } - } - return -1; -} - -inline uint32_t getElementSize(nvinfer1::DataType t) noexcept -{ - switch (t) - { - case nvinfer1::DataType::kINT32: return 4; - case nvinfer1::DataType::kFLOAT: return 4; - case nvinfer1::DataType::kHALF: return 2; - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kINT8: return 1; - } - return 0; -} - -inline int64_t volume(const nvinfer1::Dims& d) -{ - return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); -} - -template -struct PPM -{ - std::string magic, fileName; - int h, w, max; - uint8_t buffer[C * H * W]; -}; - -// New vPPM(variable sized PPM) class with variable dimensions. -struct vPPM -{ - std::string magic, fileName; - int h, w, max; - std::vector buffer; -}; - -struct BBox -{ - float x1, y1, x2, y2; -}; - -template -void readPPMFile(const std::string& filename, samplesCommon::PPM& ppm) -{ - ppm.fileName = filename; - std::ifstream infile(filename, std::ifstream::binary); - assert(infile.is_open() && "Attempting to read from a file that is not open."); - infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; - infile.seekg(1, infile.cur); - infile.read(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); -} - -inline void readPPMFile(const std::string& filename, vPPM& ppm, std::vector& input_dir) -{ - ppm.fileName = filename; - std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary); - infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; - infile.seekg(1, infile.cur); - - for (int i = 0; i < ppm.w * ppm.h * 3; ++i) - { - ppm.buffer.push_back(0); - } - - infile.read(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); -} - -template -void writePPMFileWithBBox(const std::string& filename, PPM& ppm, const BBox& bbox) -{ - std::ofstream outfile("./" + filename, std::ofstream::binary); - assert(!outfile.fail()); - outfile << "P6" - << "\n" - << ppm.w << " " << ppm.h << "\n" - << ppm.max << "\n"; - - auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; - const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); - const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); - const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); - const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1); - - for (int x = x1; x <= x2; ++x) - { - // bbox top border - ppm.buffer[(y1 * ppm.w + x) * 3] = 255; - ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0; - // bbox bottom border - ppm.buffer[(y2 * ppm.w + x) * 3] = 255; - ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0; - } - - for (int y = y1; y <= y2; ++y) - { - // bbox left border - ppm.buffer[(y * ppm.w + x1) * 3] = 255; - ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0; - // bbox right border - ppm.buffer[(y * ppm.w + x2) * 3] = 255; - ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0; - } - - outfile.write(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); -} - -inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vector& dets) -{ - std::ofstream outfile("./" + filename, std::ofstream::binary); - assert(!outfile.fail()); - outfile << "P6" - << "\n" - << ppm.w << " " << ppm.h << "\n" - << ppm.max << "\n"; - auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; - - for (auto bbox : dets) - { - for (int x = int(bbox.x1); x < int(bbox.x2); ++x) - { - // bbox top border - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; - // bbox bottom border - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; - } - - for (int y = int(bbox.y1); y < int(bbox.y2); ++y) - { - // bbox left border - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; - // bbox right border - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; - } - } - - outfile.write(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); -} - -class TimerBase -{ -public: - virtual void start() {} - virtual void stop() {} - float microseconds() const noexcept - { - return mMs * 1000.f; - } - float milliseconds() const noexcept - { - return mMs; - } - float seconds() const noexcept - { - return mMs / 1000.f; - } - void reset() noexcept - { - mMs = 0.f; - } - -protected: - float mMs{0.0f}; -}; - -class GpuTimer : public TimerBase -{ -public: - explicit GpuTimer(cudaStream_t stream) - : mStream(stream) - { - CHECK(cudaEventCreate(&mStart)); - CHECK(cudaEventCreate(&mStop)); - } - ~GpuTimer() - { - CHECK(cudaEventDestroy(mStart)); - CHECK(cudaEventDestroy(mStop)); - } - void start() - { - CHECK(cudaEventRecord(mStart, mStream)); - } - void stop() - { - CHECK(cudaEventRecord(mStop, mStream)); - float ms{0.0f}; - CHECK(cudaEventSynchronize(mStop)); - CHECK(cudaEventElapsedTime(&ms, mStart, mStop)); - mMs += ms; - } - -private: - cudaEvent_t mStart, mStop; - cudaStream_t mStream; -}; // class GpuTimer - -template -class CpuTimer : public TimerBase -{ -public: - using clock_type = Clock; - - void start() - { - mStart = Clock::now(); - } - void stop() - { - mStop = Clock::now(); - mMs += std::chrono::duration{mStop - mStart}.count(); - } - -private: - std::chrono::time_point mStart, mStop; -}; // class CpuTimer - -using PreciseCpuTimer = CpuTimer; - -inline std::vector splitString(std::string str, char delimiter = ',') -{ - std::vector splitVect; - std::stringstream ss(str); - std::string substr; - - while (ss.good()) - { - getline(ss, substr, delimiter); - splitVect.emplace_back(std::move(substr)); - } - return splitVect; -} - -// Return m rounded up to nearest multiple of n -inline int roundUp(int m, int n) -{ - return ((m + n - 1) / n) * n; -} - -inline int getC(const nvinfer1::Dims& d) -{ - return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; -} - -inline int getH(const nvinfer1::Dims& d) -{ - return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; -} - -inline int getW(const nvinfer1::Dims& d) -{ - return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; -} - -inline void loadLibrary(const std::string& path) -{ -#ifdef _MSC_VER - void* handle = LoadLibrary(path.c_str()); -#else - int32_t flags{RTLD_LAZY}; -#if ENABLE_ASAN - // https://github.com/google/sanitizers/issues/89 - // asan doesn't handle module unloading correctly and there are no plans on doing - // so. In order to get proper stack traces, don't delete the shared library on - // close so that asan can resolve the symbols correctly. - flags |= RTLD_NODELETE; -#endif // ENABLE_ASAN - - void* handle = dlopen(path.c_str(), flags); -#endif - if (handle == nullptr) - { -#ifdef _MSC_VER - sample::gLogError << "Could not load plugin library: " << path << std::endl; -#else - sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl; -#endif - } -} - -inline int32_t getSMVersion() -{ - int32_t deviceIndex = 0; - CHECK(cudaGetDevice(&deviceIndex)); - - int32_t major, minor; - CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); - CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); - - return ((major << 8) | minor); -} - -inline bool isSMSafe() -{ - const int32_t smVersion = getSMVersion(); - return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 || - smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807; -} - -inline bool isDataTypeSupported(nvinfer1::DataType dataType) -{ - auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); - if (!builder) - { - return false; - } - - if ((dataType == nvinfer1::DataType::kINT8 && !builder->platformHasFastInt8()) - || (dataType == nvinfer1::DataType::kHALF && !builder->platformHasFastFp16())) - { - return false; - } - - return true; -} - -} // namespace samplesCommon - -inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) -{ - os << "("; - for (int i = 0; i < dims.nbDims; ++i) - { - os << (i ? ", " : "") << dims.d[i]; - } - return os << ")"; -} - -#endif // TENSORRT_COMMON_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/half.h b/src/Detector/tensorrt_yolo/common_deprecated/half.h deleted file mode 100644 index 0755c316c..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/half.h +++ /dev/null @@ -1,4302 +0,0 @@ -// half - IEEE 754-based half-precision floating point library. -// -// Copyright (c) 2012-2017 Christian Rau -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the -// Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -/* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Version 1.12.0 - -/// \file -/// Main header file for half precision functionality. - -#ifndef HALF_HALF_HPP -#define HALF_HALF_HPP - -/// Combined gcc version number. -#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -// check C++11 language features -#if defined(__clang__) // clang -#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) -#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 -#endif -#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) -#define HALF_ENABLE_CPP11_CONSTEXPR 1 -#endif -#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) -#define HALF_ENABLE_CPP11_NOEXCEPT 1 -#endif -#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) -#define HALF_ENABLE_CPP11_USER_LITERALS 1 -#endif -#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) -#define HALF_ENABLE_CPP11_LONG_LONG 1 -#endif -/*#elif defined(__INTEL_COMPILER) //Intel C++ - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif*/ -#elif defined(__GNUC__) // gcc -#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) -#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 -#endif -#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) -#define HALF_ENABLE_CPP11_CONSTEXPR 1 -#endif -#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) -#define HALF_ENABLE_CPP11_NOEXCEPT 1 -#endif -#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) -#define HALF_ENABLE_CPP11_USER_LITERALS 1 -#endif -#if !defined(HALF_ENABLE_CPP11_LONG_LONG) -#define HALF_ENABLE_CPP11_LONG_LONG 1 -#endif -#endif -#elif defined(_MSC_VER) // Visual C++ -#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) -#define HALF_ENABLE_CPP11_CONSTEXPR 1 -#endif -#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) -#define HALF_ENABLE_CPP11_NOEXCEPT 1 -#endif -#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) -#define HALF_ENABLE_CPP11_USER_LITERALS 1 -#endif -#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) -#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 -#endif -#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) -#define HALF_ENABLE_CPP11_LONG_LONG 1 -#endif -#define HALF_POP_WARNINGS 1 -#pragma warning(push) -#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, negative unsigned -#endif - -// check C++11 library features -#include -#if defined(_LIBCPP_VERSION) // libc++ -#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 -#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS -#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 -#endif -#ifndef HALF_ENABLE_CPP11_CSTDINT -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#ifndef HALF_ENABLE_CPP11_CMATH -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#ifndef HALF_ENABLE_CPP11_HASH -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#endif -#elif defined(__GLIBCXX__) // libstdc++ -#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 -#ifdef __clang__ -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) -#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 -#endif -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#else -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#endif -#endif -#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ -#if _CPPLIB_VER >= 520 -#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS -#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 -#endif -#ifndef HALF_ENABLE_CPP11_CSTDINT -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#ifndef HALF_ENABLE_CPP11_HASH -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#endif -#if _CPPLIB_VER >= 610 -#ifndef HALF_ENABLE_CPP11_CMATH -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#endif -#endif -#undef HALF_GNUC_VERSION - -// support constexpr -#if HALF_ENABLE_CPP11_CONSTEXPR -#define HALF_CONSTEXPR constexpr -#define HALF_CONSTEXPR_CONST constexpr -#else -#define HALF_CONSTEXPR -#define HALF_CONSTEXPR_CONST const -#endif - -// support noexcept -#if HALF_ENABLE_CPP11_NOEXCEPT -#define HALF_NOEXCEPT noexcept -#define HALF_NOTHROW noexcept -#else -#define HALF_NOEXCEPT -#define HALF_NOTHROW throw() -#endif - -#include -#include -#include -#include -#include -#include -#if HALF_ENABLE_CPP11_TYPE_TRAITS -#include -#endif -#if HALF_ENABLE_CPP11_CSTDINT -#include -#endif -#if HALF_ENABLE_CPP11_HASH -#include -#endif - -/// Default rounding mode. -/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and `float`s as -/// well as for the half_cast() if not specifying a rounding mode explicitly. It can be redefined (before including -/// half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of -/// `std::float_round_style`: -/// -/// `std::float_round_style` | value | rounding -/// ---------------------------------|-------|------------------------- -/// `std::round_indeterminate` | -1 | fastest (default) -/// `std::round_toward_zero` | 0 | toward zero -/// `std::round_to_nearest` | 1 | to nearest -/// `std::round_toward_infinity` | 2 | toward positive infinity -/// `std::round_toward_neg_infinity` | 3 | toward negative infinity -/// -/// By default this is set to `-1` (`std::round_indeterminate`), which uses truncation (round toward zero, but with -/// overflows set to infinity) and is the fastest rounding mode possible. It can even be set to -/// `std::numeric_limits::round_style` to synchronize the rounding mode with that of the underlying -/// single-precision implementation. -#ifndef HALF_ROUND_STYLE -#define HALF_ROUND_STYLE 1 // = std::round_to_nearest -#endif - -/// Tie-breaking behaviour for round to nearest. -/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this -/// is defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way -/// cases (and thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more -/// IEEE-conformant behaviour is needed. -#ifndef HALF_ROUND_TIES_TO_EVEN -#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero -#endif - -/// Value signaling overflow. -/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow -/// of an operation, in particular it just evaluates to positive infinity. -#define HUGE_VALH std::numeric_limits::infinity() - -/// Fast half-precision fma function. -/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate -/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all -/// arithmetic operations, this is in fact always the case. -#define FP_FAST_FMAH 1 - -#ifndef FP_ILOGB0 -#define FP_ILOGB0 INT_MIN -#endif -#ifndef FP_ILOGBNAN -#define FP_ILOGBNAN INT_MAX -#endif -#ifndef FP_SUBNORMAL -#define FP_SUBNORMAL 0 -#endif -#ifndef FP_ZERO -#define FP_ZERO 1 -#endif -#ifndef FP_NAN -#define FP_NAN 2 -#endif -#ifndef FP_INFINITE -#define FP_INFINITE 3 -#endif -#ifndef FP_NORMAL -#define FP_NORMAL 4 -#endif - -/// Main namespace for half precision functionality. -/// This namespace contains all the functionality provided by the library. -namespace half_float -{ -class half; - -#if HALF_ENABLE_CPP11_USER_LITERALS -/// Library-defined half-precision literals. -/// Import this namespace to enable half-precision floating point literals: -/// ~~~~{.cpp} -/// using namespace half_float::literal; -/// half_float::half = 4.2_h; -/// ~~~~ -namespace literal -{ -half operator"" _h(long double); -} -#endif - -/// \internal -/// \brief Implementation details. -namespace detail -{ -#if HALF_ENABLE_CPP11_TYPE_TRAITS -/// Conditional type. -template -struct conditional : std::conditional -{ -}; - -/// Helper for tag dispatching. -template -struct bool_type : std::integral_constant -{ -}; -using std::false_type; -using std::true_type; - -/// Type traits for floating point types. -template -struct is_float : std::is_floating_point -{ -}; -#else -/// Conditional type. -template -struct conditional -{ - typedef T type; -}; -template -struct conditional -{ - typedef F type; -}; - -/// Helper for tag dispatching. -template -struct bool_type -{ -}; -typedef bool_type true_type; -typedef bool_type false_type; - -/// Type traits for floating point types. -template -struct is_float : false_type -{ -}; -template -struct is_float : is_float -{ -}; -template -struct is_float : is_float -{ -}; -template -struct is_float : is_float -{ -}; -template <> -struct is_float : true_type -{ -}; -template <> -struct is_float : true_type -{ -}; -template <> -struct is_float : true_type -{ -}; -#endif - -/// Type traits for floating point bits. -template -struct bits -{ - typedef unsigned char type; -}; -template -struct bits : bits -{ -}; -template -struct bits : bits -{ -}; -template -struct bits : bits -{ -}; - -#if HALF_ENABLE_CPP11_CSTDINT -/// Unsigned integer of (at least) 16 bits width. -typedef std::uint_least16_t uint16; - -/// Unsigned integer of (at least) 32 bits width. -template <> -struct bits -{ - typedef std::uint_least32_t type; -}; - -/// Unsigned integer of (at least) 64 bits width. -template <> -struct bits -{ - typedef std::uint_least64_t type; -}; -#else -/// Unsigned integer of (at least) 16 bits width. -typedef unsigned short uint16; - -/// Unsigned integer of (at least) 32 bits width. -template <> -struct bits : conditional::digits >= 32, unsigned int, unsigned long> -{ -}; - -#if HALF_ENABLE_CPP11_LONG_LONG -/// Unsigned integer of (at least) 64 bits width. -template <> -struct bits : conditional::digits >= 64, unsigned long, unsigned long long> -{ -}; -#else -/// Unsigned integer of (at least) 64 bits width. -template <> -struct bits -{ - typedef unsigned long type; -}; -#endif -#endif - -/// Tag type for binary construction. -struct binary_t -{ -}; - -/// Tag for binary construction. -HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - -/// Temporary half-precision expression. -/// This class represents a half-precision expression which just stores a single-precision value internally. -struct expr -{ - /// Conversion constructor. - /// \param f single-precision value to convert - explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - HALF_CONSTEXPR operator float() const HALF_NOEXCEPT - { - return value_; - } - -private: - /// Internal expression value stored in single-precision. - float value_; -}; - -/// SFINAE helper for generic half-precision functions. -/// This class template has to be specialized for each valid combination of argument types to provide a corresponding -/// `type` member equivalent to \a T. -/// \tparam T type to return -template -struct enable -{ -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; -template -struct enable -{ - typedef T type; -}; - -/// Return type for specialized generic 2-argument half-precision functions. -/// This class template has to be specialized for each valid combination of argument types to provide a corresponding -/// `type` member denoting the appropriate return type. -/// \tparam T first argument type -/// \tparam U first argument type -template -struct result : enable -{ -}; -template <> -struct result -{ - typedef half type; -}; - -/// \name Classification helpers -/// \{ - -/// Check for infinity. -/// \tparam T argument type (builtin floating point type) -/// \param arg value to query -/// \retval true if infinity -/// \retval false else -template -bool builtin_isinf(T arg) -{ -#if HALF_ENABLE_CPP11_CMATH - return std::isinf(arg); -#elif defined(_MSC_VER) - return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); -#else - return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); -#endif -} - -/// Check for NaN. -/// \tparam T argument type (builtin floating point type) -/// \param arg value to query -/// \retval true if not a number -/// \retval false else -template -bool builtin_isnan(T arg) -{ -#if HALF_ENABLE_CPP11_CMATH - return std::isnan(arg); -#elif defined(_MSC_VER) - return ::_isnan(static_cast(arg)) != 0; -#else - return arg != arg; -#endif -} - -/// Check sign. -/// \tparam T argument type (builtin floating point type) -/// \param arg value to query -/// \retval true if signbit set -/// \retval false else -template -bool builtin_signbit(T arg) -{ -#if HALF_ENABLE_CPP11_CMATH - return std::signbit(arg); -#else - return arg < T() || (arg == T() && T(1) / arg < T()); -#endif -} - -/// \} -/// \name Conversion -/// \{ - -/// Convert IEEE single-precision to half-precision. -/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \param value single-precision value -/// \return binary representation of half-precision value -template -uint16 float2half_impl(float value, true_type) -{ - typedef bits::type uint32; - uint32 bits; // = *reinterpret_cast(&value); //violating strict aliasing! - std::memcpy(&bits, &value, sizeof(float)); - /* uint16 hbits = (bits>>16) & 0x8000; - bits &= 0x7FFFFFFF; - int exp = bits >> 23; - if(exp == 255) - return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); - if(exp > 142) - { - if(R == std::round_toward_infinity) - return hbits | 0x7C00 - (hbits>>15); - if(R == std::round_toward_neg_infinity) - return hbits | 0x7BFF + (hbits>>15); - return hbits | 0x7BFF + (R!=std::round_toward_zero); - } - int g, s; - if(exp > 112) - { - g = (bits>>12) & 1; - s = (bits&0xFFF) != 0; - hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); - } - else if(exp > 101) - { - int i = 125 - exp; - bits = (bits&0x7FFFFF) | 0x800000; - g = (bits>>i) & 1; - s = (bits&((1L<> (i+1); - } - else - { - g = 0; - s = bits != 0; - } - if(R == std::round_to_nearest) - #if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s|hbits); - #else - hbits += g; - #endif - else if(R == std::round_toward_infinity) - hbits += ~(hbits>>15) & (s|g); - else if(R == std::round_toward_neg_infinity) - hbits += (hbits>>15) & (g|s); - */ - static const uint16 base_table[512] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, - 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, - 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, - 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, - 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800, - 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00}; - static const unsigned char shift_table[512] = {24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13}; - uint16 hbits = base_table[bits >> 23] + static_cast((bits & 0x7FFFFF) >> shift_table[bits >> 23]); - if (R == std::round_to_nearest) - hbits += (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | (((bits >> 23) & 0xFF) == 102)) - & ((hbits & 0x7C00) != 0x7C00) -#if HALF_ROUND_TIES_TO_EVEN - & (((((static_cast(1) << (shift_table[bits >> 23] - 1)) - 1) & bits) != 0) | hbits) -#endif - ; - else if (R == std::round_toward_zero) - hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23]; - else if (R == std::round_toward_infinity) - hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) - | (((bits >> 23) <= 102) & ((bits >> 23) != 0))) - & (hbits < 0x7C00)) - - ((hbits == 0xFC00) & ((bits >> 23) != 511)); - else if (R == std::round_toward_neg_infinity) - hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) - | (((bits >> 23) <= 358) & ((bits >> 23) != 256))) - & (hbits < 0xFC00) & (hbits >> 15)) - - ((hbits == 0x7C00) & ((bits >> 23) != 255)); - return hbits; -} - -/// Convert IEEE double-precision to half-precision. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \param value double-precision value -/// \return binary representation of half-precision value -template -uint16 float2half_impl(double value, true_type) -{ - typedef bits::type uint32; - typedef bits::type uint64; - uint64 bits; // = *reinterpret_cast(&value); //violating strict aliasing! - std::memcpy(&bits, &value, sizeof(double)); - uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; - uint16 hbits = (hi >> 16) & 0x8000; - hi &= 0x7FFFFFFF; - int exp = hi >> 20; - if (exp == 2047) - return hbits | 0x7C00 | (0x3FF & -static_cast((bits & 0xFFFFFFFFFFFFF) != 0)); - if (exp > 1038) - { - if (R == std::round_toward_infinity) - return hbits | 0x7C00 - (hbits >> 15); - if (R == std::round_toward_neg_infinity) - return hbits | 0x7BFF + (hbits >> 15); - return hbits | 0x7BFF + (R != std::round_toward_zero); - } - int g, s = lo != 0; - if (exp > 1008) - { - g = (hi >> 9) & 1; - s |= (hi & 0x1FF) != 0; - hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF); - } - else if (exp > 997) - { - int i = 1018 - exp; - hi = (hi & 0xFFFFF) | 0x100000; - g = (hi >> i) & 1; - s |= (hi & ((1L << i) - 1)) != 0; - hbits |= hi >> (i + 1); - } - else - { - g = 0; - s |= hi != 0; - } - if (R == std::round_to_nearest) -#if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s | hbits); -#else - hbits += g; -#endif - else if (R == std::round_toward_infinity) - hbits += ~(hbits >> 15) & (s | g); - else if (R == std::round_toward_neg_infinity) - hbits += (hbits >> 15) & (g | s); - return hbits; -} - -/// Convert non-IEEE floating point to half-precision. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam T source type (builtin floating point type) -/// \param value floating point value -/// \return binary representation of half-precision value -template -uint16 float2half_impl(T value, ...) -{ - uint16 hbits = static_cast(builtin_signbit(value)) << 15; - if (value == T()) - return hbits; - if (builtin_isnan(value)) - return hbits | 0x7FFF; - if (builtin_isinf(value)) - return hbits | 0x7C00; - int exp; - std::frexp(value, &exp); - if (exp > 16) - { - if (R == std::round_toward_infinity) - return hbits | (0x7C00 - (hbits >> 15)); - else if (R == std::round_toward_neg_infinity) - return hbits | (0x7BFF + (hbits >> 15)); - return hbits | (0x7BFF + (R != std::round_toward_zero)); - } - if (exp < -13) - value = std::ldexp(value, 24); - else - { - value = std::ldexp(value, 11 - exp); - hbits |= ((exp + 13) << 10); - } - T ival, frac = std::modf(value, &ival); - hbits += static_cast(std::abs(static_cast(ival))); - if (R == std::round_to_nearest) - { - frac = std::abs(frac); -#if HALF_ROUND_TIES_TO_EVEN - hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits); -#else - hbits += frac >= T(0.5); -#endif - } - else if (R == std::round_toward_infinity) - hbits += frac > T(); - else if (R == std::round_toward_neg_infinity) - hbits += frac < T(); - return hbits; -} - -/// Convert floating point to half-precision. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam T source type (builtin floating point type) -/// \param value floating point value -/// \return binary representation of half-precision value -template -uint16 float2half(T value) -{ - return float2half_impl( - value, bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); -} - -/// Convert integer to half-precision floating point. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam S `true` if value negative, `false` else -/// \tparam T type to convert (builtin integer type) -/// \param value non-negative integral value -/// \return binary representation of half-precision value -template -uint16 int2half_impl(T value) -{ -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); -#endif - if (S) - value = -value; - uint16 bits = S << 15; - if (value > 0xFFFF) - { - if (R == std::round_toward_infinity) - bits |= 0x7C00 - S; - else if (R == std::round_toward_neg_infinity) - bits |= 0x7BFF + S; - else - bits |= 0x7BFF + (R != std::round_toward_zero); - } - else if (value) - { - uint32_t m = value, exp = 24; - for (; m < 0x400; m <<= 1, --exp) - ; - for (; m > 0x7FF; m >>= 1, ++exp) - ; - bits |= (exp << 10) + m; - if (exp > 24) - { - if (R == std::round_to_nearest) - bits += (value >> (exp - 25)) & 1 -#if HALF_ROUND_TIES_TO_EVEN - & (((((1 << (exp - 25)) - 1) & value) != 0) | bits) -#endif - ; - else if (R == std::round_toward_infinity) - bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S; - else if (R == std::round_toward_neg_infinity) - bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S; - } - } - return bits; -} - -/// Convert integer to half-precision floating point. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam T type to convert (builtin integer type) -/// \param value integral value -/// \return binary representation of half-precision value -template -uint16 int2half(T value) -{ - return (value < 0) ? int2half_impl(value) : int2half_impl(value); -} - -/// Convert half-precision to IEEE single-precision. -/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). -/// \param value binary representation of half-precision value -/// \return single-precision value -inline float half2float_impl(uint16 value, float, true_type) -{ - typedef bits::type uint32; - /* uint32 bits = static_cast(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - bits |= 0x38000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,bits-=0x800000) ; - bits += static_cast(abs) << 13; - } - */ - static const uint32 mantissa_table[2048] = {0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, - 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, - 0x35700000, 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, - 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 0x36000000, - 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, - 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 0x36400000, 0x36440000, 0x36480000, - 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, - 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, - 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, - 0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, - 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, - 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000, - 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, - 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, - 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, - 0x370D0000, 0x370E0000, 0x370F0000, 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, - 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, - 0x371F0000, 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, - 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 0x37300000, - 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, - 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 0x37400000, 0x37410000, 0x37420000, - 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, - 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, - 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, - 0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, - 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, - 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000, - 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, - 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, - 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, - 0x378E8000, 0x378F0000, 0x378F8000, 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, - 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, - 0x37978000, 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, - 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 0x37A00000, - 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, - 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 0x37A80000, 0x37A88000, 0x37A90000, - 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, - 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, - 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, - 0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, - 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, - 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000, - 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, - 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, - 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, - 0x37D68000, 0x37D70000, 0x37D78000, 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, - 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, - 0x37DF8000, 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, - 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 0x37E80000, - 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, - 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 0x37F00000, 0x37F08000, 0x37F10000, - 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, - 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, - 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, - 0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, - 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, - 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000, - 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, - 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, - 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, - 0x380F4000, 0x380F8000, 0x380FC000, 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, - 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, - 0x3813C000, 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, - 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 0x38180000, - 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, - 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 0x381C0000, 0x381C4000, 0x381C8000, - 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, - 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, - 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, - 0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, - 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, - 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000, - 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, - 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000, - 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, - 0x38334000, 0x38338000, 0x3833C000, 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, - 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, - 0x3837C000, 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, - 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 0x383C0000, - 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, - 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 0x38400000, 0x38404000, 0x38408000, - 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, - 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, - 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, - 0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, - 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, - 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000, - 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, - 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000, - 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, - 0x38574000, 0x38578000, 0x3857C000, 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, - 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, - 0x385BC000, 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, - 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 0x38600000, - 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, - 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 0x38640000, 0x38644000, 0x38648000, - 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, - 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, - 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, - 0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, - 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, - 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000, - 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, - 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000, - 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, - 0x387B4000, 0x387B8000, 0x387BC000, 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, - 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, - 0x387FC000, 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, - 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 0x38020000, - 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, - 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 0x38040000, 0x38042000, 0x38044000, - 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, - 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, - 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, - 0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, - 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, - 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000, - 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, - 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, - 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, - 0x380FA000, 0x380FC000, 0x380FE000, 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, - 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, - 0x3811E000, 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, - 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 0x38140000, - 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, - 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 0x38160000, 0x38162000, 0x38164000, - 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, - 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, - 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, - 0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, - 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, - 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000, - 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, - 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, - 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, - 0x3821A000, 0x3821C000, 0x3821E000, 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, - 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, - 0x3823E000, 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, - 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 0x38260000, - 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, - 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 0x38280000, 0x38282000, 0x38284000, - 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, - 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, - 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, - 0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, - 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, - 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000, - 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, - 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, - 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, - 0x3833A000, 0x3833C000, 0x3833E000, 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, - 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, - 0x3835E000, 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, - 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 0x38380000, - 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, - 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 0x383A0000, 0x383A2000, 0x383A4000, - 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, - 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, - 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, - 0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, - 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, - 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000, - 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, - 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, - 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, - 0x3845A000, 0x3845C000, 0x3845E000, 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, - 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, - 0x3847E000, 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, - 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 0x384A0000, - 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, - 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 0x384C0000, 0x384C2000, 0x384C4000, - 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, - 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, - 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, - 0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, - 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, - 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000, - 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, - 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, - 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, - 0x3857A000, 0x3857C000, 0x3857E000, 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, - 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, - 0x3859E000, 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, - 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 0x385C0000, - 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, - 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 0x385E0000, 0x385E2000, 0x385E4000, - 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, - 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, - 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, - 0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, - 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, - 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000, - 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, - 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, - 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, - 0x3869A000, 0x3869C000, 0x3869E000, 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, - 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, - 0x386BE000, 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, - 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 0x386E0000, - 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, - 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 0x38700000, 0x38702000, 0x38704000, - 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, - 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, - 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, - 0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, - 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, - 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000, - 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, - 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, - 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, - 0x387BA000, 0x387BC000, 0x387BE000, 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, - 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, - 0x387DE000, 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, - 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000}; - static const uint32 exponent_table[64] = {0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, - 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, - 0x07800000, 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, - 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 0x80000000, - 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, - 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 0x88000000, 0x88800000, 0x89000000, - 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, - 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000}; - static const unsigned short offset_table[64] = {0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; - uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + exponent_table[value >> 10]; - // return *reinterpret_cast(&bits); //violating strict aliasing! - float out; - std::memcpy(&out, &bits, sizeof(float)); - return out; -} - -/// Convert half-precision to IEEE double-precision. -/// \param value binary representation of half-precision value -/// \return double-precision value -inline double half2float_impl(uint16 value, double, true_type) -{ - typedef bits::type uint32; - typedef bits::type uint64; - uint32 hi = static_cast(value & 0x8000) << 16; - int abs = value & 0x7FFF; - if (abs) - { - hi |= 0x3F000000 << static_cast(abs >= 0x7C00); - for (; abs < 0x400; abs <<= 1, hi -= 0x100000) - ; - hi += static_cast(abs) << 10; - } - uint64 bits = static_cast(hi) << 32; - // return *reinterpret_cast(&bits); //violating strict aliasing! - double out; - std::memcpy(&out, &bits, sizeof(double)); - return out; -} - -/// Convert half-precision to non-IEEE floating point. -/// \tparam T type to convert to (builtin integer type) -/// \param value binary representation of half-precision value -/// \return floating point value -template -T half2float_impl(uint16 value, T, ...) -{ - T out; - int abs = value & 0x7FFF; - if (abs > 0x7C00) - out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); - else if (abs == 0x7C00) - out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); - else if (abs > 0x3FF) - out = std::ldexp(static_cast((abs & 0x3FF) | 0x400), (abs >> 10) - 25); - else - out = std::ldexp(static_cast(abs), -24); - return (value & 0x8000) ? -out : out; -} - -/// Convert half-precision to floating point. -/// \tparam T type to convert to (builtin integer type) -/// \param value binary representation of half-precision value -/// \return floating point value -template -T half2float(uint16 value) -{ - return half2float_impl( - value, T(), bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); -} - -/// Convert half-precision floating point to integer. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam E `true` for round to even, `false` for round away from zero -/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign -/// bits) \param value binary representation of half-precision value \return integral value -template -T half2int_impl(uint16 value) -{ -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); -#endif - uint32_t e = value & 0x7FFF; - if (e >= 0x7C00) - return (value & 0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); - if (e < 0x3800) - { - if (R == std::round_toward_infinity) - return T(~(value >> 15) & (e != 0)); - else if (R == std::round_toward_neg_infinity) - return -T(value > 0x8000); - return T(); - } - uint32_t m = (value & 0x3FF) | 0x400; - e >>= 10; - if (e < 25) - { - if (R == std::round_to_nearest) - m += (1 << (24 - e)) - (~(m >> (25 - e)) & E); - else if (R == std::round_toward_infinity) - m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U); - else if (R == std::round_toward_neg_infinity) - m += -(value >> 15) & ((1 << (25 - e)) - 1U); - m >>= 25 - e; - } - else - m <<= e - 25; - return (value & 0x8000) ? -static_cast(m) : static_cast(m); -} - -/// Convert half-precision floating point to integer. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign -/// bits) \param value binary representation of half-precision value \return integral value -template -T half2int(uint16 value) -{ - return half2int_impl(value); -} - -/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. -/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign -/// bits) \param value binary representation of half-precision value \return integral value -template -T half2int_up(uint16 value) -{ - return half2int_impl(value); -} - -/// Round half-precision number to nearest integer value. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \tparam E `true` for round to even, `false` for round away from zero -/// \param value binary representation of half-precision value -/// \return half-precision bits for nearest integral value -template -uint16 round_half_impl(uint16 value) -{ - uint32_t e = value & 0x7FFF; - uint16 result = value; - if (e < 0x3C00) - { - result &= 0x8000; - if (R == std::round_to_nearest) - result |= 0x3C00U & -(e >= (0x3800 + E)); - else if (R == std::round_toward_infinity) - result |= 0x3C00U & -(~(value >> 15) & (e != 0)); - else if (R == std::round_toward_neg_infinity) - result |= 0x3C00U & -(value > 0x8000); - } - else if (e < 0x6400) - { - e = 25 - (e >> 10); - uint32_t mask = (1 << e) - 1; - if (R == std::round_to_nearest) - result += (1 << (e - 1)) - (~(result >> e) & E); - else if (R == std::round_toward_infinity) - result += mask & ((value >> 15) - 1); - else if (R == std::round_toward_neg_infinity) - result += mask & -(value >> 15); - result &= ~mask; - } - return result; -} - -/// Round half-precision number to nearest integer value. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding -/// \param value binary representation of half-precision value -/// \return half-precision bits for nearest integral value -template -uint16 round_half(uint16 value) -{ - return round_half_impl(value); -} - -/// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. -/// \param value binary representation of half-precision value -/// \return half-precision bits for nearest integral value -inline uint16 round_half_up(uint16 value) -{ - return round_half_impl(value); -} -/// \} - -struct functions; -template -struct unary_specialized; -template -struct binary_specialized; -template -struct half_caster; -} // namespace detail - -/// Half-precision floating point type. -/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and -/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and -/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations -/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to -/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic -/// expressions are kept in single-precision as long as possible (while of course still maintaining a strong -/// half-precision type). -/// -/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and -/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which -/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the -/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be -/// of exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will -/// most probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying -/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 -/// bits if your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the -/// case on nearly any reasonable platform. -/// -/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable -/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. -class half -{ - friend struct detail::functions; - friend struct detail::unary_specialized; - friend struct detail::binary_specialized; - template - friend struct detail::half_caster; - friend class std::numeric_limits; -#if HALF_ENABLE_CPP11_HASH - friend struct std::hash; -#endif -#if HALF_ENABLE_CPP11_USER_LITERALS - friend half literal::operator"" _h(long double); -#endif - -public: - /// Default constructor. - /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics - /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. - HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - - /// Copy constructor. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - half(detail::expr rhs) - : data_(detail::float2half(static_cast(rhs))) - { - } - - /// Conversion constructor. - /// \param rhs float to convert - explicit half(float rhs) - : data_(detail::float2half(rhs)) - { - } - - /// Conversion to single-precision. - /// \return single precision value representing expression value - operator float() const - { - return detail::half2float(data_); - } - - /// Assignment operator. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - /// \return reference to this half - half& operator=(detail::expr rhs) - { - return *this = static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to add - /// \return reference to this half - template - typename detail::enable::type operator+=(T rhs) - { - return *this += static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to subtract - /// \return reference to this half - template - typename detail::enable::type operator-=(T rhs) - { - return *this -= static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to multiply with - /// \return reference to this half - template - typename detail::enable::type operator*=(T rhs) - { - return *this *= static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to divide by - /// \return reference to this half - template - typename detail::enable::type operator/=(T rhs) - { - return *this /= static_cast(rhs); - } - - /// Assignment operator. - /// \param rhs single-precision value to copy from - /// \return reference to this half - half& operator=(float rhs) - { - data_ = detail::float2half(rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to add - /// \return reference to this half - half& operator+=(float rhs) - { - data_ = detail::float2half(detail::half2float(data_) + rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to subtract - /// \return reference to this half - half& operator-=(float rhs) - { - data_ = detail::float2half(detail::half2float(data_) - rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to multiply with - /// \return reference to this half - half& operator*=(float rhs) - { - data_ = detail::float2half(detail::half2float(data_) * rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to divide by - /// \return reference to this half - half& operator/=(float rhs) - { - data_ = detail::float2half(detail::half2float(data_) / rhs); - return *this; - } - - /// Prefix increment. - /// \return incremented half value - half& operator++() - { - return *this += 1.0f; - } - - /// Prefix decrement. - /// \return decremented half value - half& operator--() - { - return *this -= 1.0f; - } - - /// Postfix increment. - /// \return non-incremented half value - half operator++(int) - { - half out(*this); - ++*this; - return out; - } - - /// Postfix decrement. - /// \return non-decremented half value - half operator--(int) - { - half out(*this); - --*this; - return out; - } - -private: - /// Rounding mode to use - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); - - /// Constructor. - /// \param bits binary representation to set half to - HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} - - /// Internal binary representation - detail::uint16 data_; -}; - -#if HALF_ENABLE_CPP11_USER_LITERALS -namespace literal -{ -/// Half literal. -/// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due -/// to rather involved conversions. -/// \param value literal value -/// \return half with given value (if representable) -inline half operator"" _h(long double value) -{ - return half(detail::binary, detail::float2half(value)); -} -} // namespace literal -#endif - -namespace detail -{ -/// Wrapper implementing unspecialized half-precision functions. -struct functions -{ - /// Addition implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision sum stored in single-precision - static expr plus(float x, float y) - { - return expr(x + y); - } - - /// Subtraction implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision difference stored in single-precision - static expr minus(float x, float y) - { - return expr(x - y); - } - - /// Multiplication implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision product stored in single-precision - static expr multiplies(float x, float y) - { - return expr(x * y); - } - - /// Division implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision quotient stored in single-precision - static expr divides(float x, float y) - { - return expr(x / y); - } - - /// Output implementation. - /// \param out stream to write to - /// \param arg value to write - /// \return reference to stream - template - static std::basic_ostream& write(std::basic_ostream& out, float arg) - { - return out << arg; - } - - /// Input implementation. - /// \param in stream to read from - /// \param arg half to read into - /// \return reference to stream - template - static std::basic_istream& read(std::basic_istream& in, half& arg) - { - float f; - if (in >> f) - arg = f; - return in; - } - - /// Modulo implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr fmod(float x, float y) - { - return expr(std::fmod(x, y)); - } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr remainder(float x, float y) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::remainder(x, y)); -#else - if (builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - float ax = std::fabs(x), ay = std::fabs(y); - if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if (ay >= 65536.0f) - return expr(x); - if (ax == ay) - return expr(builtin_signbit(x) ? -0.0f : 0.0f); - ax = std::fmod(ax, ay + ay); - float y2 = 0.5f * ay; - if (ax > y2) - { - ax -= ay; - if (ax >= y2) - ax -= ay; - } - return expr(builtin_signbit(x) ? -ax : ax); -#endif - } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \param quo address to store quotient bits at - /// \return Half-precision division remainder stored in single-precision - static expr remquo(float x, float y, int* quo) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::remquo(x, y, quo)); -#else - if (builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - bool sign = builtin_signbit(x), qsign = static_cast(sign ^ builtin_signbit(y)); - float ax = std::fabs(x), ay = std::fabs(y); - if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if (ay >= 65536.0f) - return expr(x); - if (ax == ay) - return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); - ax = std::fmod(ax, 8.0f * ay); - int cquo = 0; - if (ax >= 4.0f * ay) - { - ax -= 4.0f * ay; - cquo += 4; - } - if (ax >= 2.0f * ay) - { - ax -= 2.0f * ay; - cquo += 2; - } - float y2 = 0.5f * ay; - if (ax > y2) - { - ax -= ay; - ++cquo; - if (ax >= y2) - { - ax -= ay; - ++cquo; - } - } - return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); -#endif - } - - /// Positive difference implementation. - /// \param x first operand - /// \param y second operand - /// \return Positive difference stored in single-precision - static expr fdim(float x, float y) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::fdim(x, y)); -#else - return expr((x <= y) ? 0.0f : (x - y)); -#endif - } - - /// Fused multiply-add implementation. - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return \a x * \a y + \a z stored in single-precision - static expr fma(float x, float y, float z) - { -#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) - return expr(std::fma(x, y, z)); -#else - return expr(x * y + z); -#endif - } - - /// Get NaN. - /// \return Half-precision quiet NaN - static half nanh() - { - return half(binary, 0x7FFF); - } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp(float arg) - { - return expr(std::exp(arg)); - } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr expm1(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::expm1(arg)); -#else - return expr(static_cast(std::exp(static_cast(arg)) - 1.0)); -#endif - } - - /// Binary exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp2(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::exp2(arg)); -#else - return expr(static_cast(std::exp(arg * 0.69314718055994530941723212145818))); -#endif - } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log(float arg) - { - return expr(std::log(arg)); - } - - /// Common logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log10(float arg) - { - return expr(std::log10(arg)); - } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log1p(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::log1p(arg)); -#else - return expr(static_cast(std::log(1.0 + arg))); -#endif - } - - /// Binary logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log2(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::log2(arg)); -#else - return expr(static_cast(std::log(static_cast(arg)) * 1.4426950408889634073599246810019)); -#endif - } - - /// Square root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sqrt(float arg) - { - return expr(std::sqrt(arg)); - } - - /// Cubic root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cbrt(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::cbrt(arg)); -#else - if (builtin_isnan(arg) || builtin_isinf(arg)) - return expr(arg); - return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0 / 3.0)) - : static_cast(std::pow(static_cast(arg), 1.0 / 3.0))); -#endif - } - - /// Hypotenuse implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr hypot(float x, float y) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::hypot(x, y)); -#else - return expr((builtin_isinf(x) || builtin_isinf(y)) - ? std::numeric_limits::infinity() - : static_cast(std::sqrt(static_cast(x) * x + static_cast(y) * y))); -#endif - } - - /// Power implementation. - /// \param base value to exponentiate - /// \param exp power to expontiate to - /// \return function value stored in single-preicision - static expr pow(float base, float exp) - { - return expr(std::pow(base, exp)); - } - - /// Sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sin(float arg) - { - return expr(std::sin(arg)); - } - - /// Cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cos(float arg) - { - return expr(std::cos(arg)); - } - - /// Tan implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tan(float arg) - { - return expr(std::tan(arg)); - } - - /// Arc sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asin(float arg) - { - return expr(std::asin(arg)); - } - - /// Arc cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acos(float arg) - { - return expr(std::acos(arg)); - } - - /// Arc tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atan(float arg) - { - return expr(std::atan(arg)); - } - - /// Arc tangent implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr atan2(float x, float y) - { - return expr(std::atan2(x, y)); - } - - /// Hyperbolic sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sinh(float arg) - { - return expr(std::sinh(arg)); - } - - /// Hyperbolic cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cosh(float arg) - { - return expr(std::cosh(arg)); - } - - /// Hyperbolic tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tanh(float arg) - { - return expr(std::tanh(arg)); - } - - /// Hyperbolic area sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asinh(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::asinh(arg)); -#else - return expr((arg == -std::numeric_limits::infinity()) - ? arg - : static_cast(std::log(arg + std::sqrt(arg * arg + 1.0)))); -#endif - } - - /// Hyperbolic area cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acosh(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::acosh(arg)); -#else - return expr((arg < -1.0f) ? std::numeric_limits::quiet_NaN() - : static_cast(std::log(arg + std::sqrt(arg * arg - 1.0)))); -#endif - } - - /// Hyperbolic area tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atanh(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::atanh(arg)); -#else - return expr(static_cast(0.5 * std::log((1.0 + arg) / (1.0 - arg)))); -#endif - } - - /// Error function implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erf(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::erf(arg)); -#else - return expr(static_cast(erf(static_cast(arg)))); -#endif - } - - /// Complementary implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erfc(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::erfc(arg)); -#else - return expr(static_cast(1.0 - erf(static_cast(arg)))); -#endif - } - - /// Gamma logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr lgamma(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::lgamma(arg)); -#else - if (builtin_isinf(arg)) - return expr(std::numeric_limits::infinity()); - if (arg < 0.0f) - { - float i, f = std::modf(-arg, &i); - if (f == 0.0f) - return expr(std::numeric_limits::infinity()); - return expr(static_cast(1.1447298858494001741434273513531 - - std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - lgamma(1.0 - arg))); - } - return expr(static_cast(lgamma(static_cast(arg)))); -#endif - } - - /// Gamma implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tgamma(float arg) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::tgamma(arg)); -#else - if (arg == 0.0f) - return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) - : expr(std::numeric_limits::infinity()); - if (arg < 0.0f) - { - float i, f = std::modf(-arg, &i); - if (f == 0.0f) - return expr(std::numeric_limits::quiet_NaN()); - double value = 3.1415926535897932384626433832795 - / (std::sin(3.1415926535897932384626433832795 * f) * std::exp(lgamma(1.0 - arg))); - return expr(static_cast((std::fmod(i, 2.0f) == 0.0f) ? -value : value)); - } - if (builtin_isinf(arg)) - return expr(arg); - return expr(static_cast(std::exp(lgamma(static_cast(arg))))); -#endif - } - - /// Floor implementation. - /// \param arg value to round - /// \return rounded value - static half floor(half arg) - { - return half(binary, round_half(arg.data_)); - } - - /// Ceiling implementation. - /// \param arg value to round - /// \return rounded value - static half ceil(half arg) - { - return half(binary, round_half(arg.data_)); - } - - /// Truncation implementation. - /// \param arg value to round - /// \return rounded value - static half trunc(half arg) - { - return half(binary, round_half(arg.data_)); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half round(half arg) - { - return half(binary, round_half_up(arg.data_)); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lround(half arg) - { - return detail::half2int_up(arg.data_); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half rint(half arg) - { - return half(binary, round_half(arg.data_)); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lrint(half arg) - { - return detail::half2int(arg.data_); - } - -#if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llround(half arg) - { - return detail::half2int_up(arg.data_); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llrint(half arg) - { - return detail::half2int(arg.data_); - } -#endif - - /// Decompression implementation. - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return normalized significant - static half frexp(half arg, int* exp) - { - int m = arg.data_ & 0x7FFF, e = -14; - if (m >= 0x7C00 || !m) - return *exp = 0, arg; - for (; m < 0x400; m <<= 1, --e) - ; - return *exp = e + (m >> 10), half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF)); - } - - /// Decompression implementation. - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part - static half modf(half arg, half* iptr) - { - uint32_t e = arg.data_ & 0x7FFF; - if (e >= 0x6400) - return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00))); - if (e < 0x3C00) - return iptr->data_ = arg.data_ & 0x8000, arg; - e >>= 10; - uint32_t mask = (1 << (25 - e)) - 1, m = arg.data_ & mask; - iptr->data_ = arg.data_ & ~mask; - if (!m) - return half(binary, arg.data_ & 0x8000); - for (; m < 0x400; m <<= 1, --e) - ; - return half(binary, static_cast((arg.data_ & 0x8000) | (e << 10) | (m & 0x3FF))); - } - - /// Scaling implementation. - /// \param arg number to scale - /// \param exp power of two to scale by - /// \return scaled number - static half scalbln(half arg, long exp) - { - uint32_t m = arg.data_ & 0x7FFF; - if (m >= 0x7C00 || !m) - return arg; - for (; m < 0x400; m <<= 1, --exp) - ; - exp += m >> 10; - uint16 value = arg.data_ & 0x8000; - if (exp > 30) - { - if (half::round_style == std::round_toward_zero) - value |= 0x7BFF; - else if (half::round_style == std::round_toward_infinity) - value |= 0x7C00 - (value >> 15); - else if (half::round_style == std::round_toward_neg_infinity) - value |= 0x7BFF + (value >> 15); - else - value |= 0x7C00; - } - else if (exp > 0) - value |= (exp << 10) | (m & 0x3FF); - else if (exp > -11) - { - m = (m & 0x3FF) | 0x400; - if (half::round_style == std::round_to_nearest) - { - m += 1 << -exp; -#if HALF_ROUND_TIES_TO_EVEN - m -= (m >> (1 - exp)) & 1; -#endif - } - else if (half::round_style == std::round_toward_infinity) - m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U); - else if (half::round_style == std::round_toward_neg_infinity) - m += -(value >> 15) & ((1 << (1 - exp)) - 1U); - value |= m >> (1 - exp); - } - else if (half::round_style == std::round_toward_infinity) - value -= (value >> 15) - 1; - else if (half::round_style == std::round_toward_neg_infinity) - value += value >> 15; - return half(binary, value); - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static int ilogb(half arg) - { - int abs = arg.data_ & 0x7FFF; - if (!abs) - return FP_ILOGB0; - if (abs < 0x7C00) - { - int exp = (abs >> 10) - 15; - if (abs < 0x400) - for (; abs < 0x200; abs <<= 1, --exp) - ; - return exp; - } - if (abs > 0x7C00) - return FP_ILOGBNAN; - return INT_MAX; - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static half logb(half arg) - { - int abs = arg.data_ & 0x7FFF; - if (!abs) - return half(binary, 0xFC00); - if (abs < 0x7C00) - { - int exp = (abs >> 10) - 15; - if (abs < 0x400) - for (; abs < 0x200; abs <<= 1, --exp) - ; - uint16 bits = (exp < 0) << 15; - if (exp) - { - uint32_t m = std::abs(exp) << 6, e = 18; - for (; m < 0x400; m <<= 1, --e) - ; - bits |= (e << 10) + m; - } - return half(binary, bits); - } - if (abs > 0x7C00) - return arg; - return half(binary, 0x7C00); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nextafter(half from, half to) - { - uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; - if (fabs > 0x7C00) - return from; - if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs)) - return to; - if (!fabs) - return half(binary, (to.data_ & 0x8000) + 1); - bool lt = ((fabs == from.data_) ? static_cast(fabs) : -static_cast(fabs)) - < ((tabs == to.data_) ? static_cast(tabs) : -static_cast(tabs)); - return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lt)) << 1) - 1); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nexttoward(half from, long double to) - { - if (isnan(from)) - return from; - long double lfrom = static_cast(from); - if (builtin_isnan(to) || lfrom == to) - return half(static_cast(to)); - if (!(from.data_ & 0x7FFF)) - return half(binary, (static_cast(builtin_signbit(to)) << 15) + 1); - return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lfrom < to)) << 1) - 1); - } - - /// Sign implementation - /// \param x first operand - /// \param y second operand - /// \return composed value - static half copysign(half x, half y) - { - return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000)); - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if infinite number - /// \retval false else - static int fpclassify(half arg) - { - uint32_t abs = arg.data_ & 0x7FFF; - return abs - ? ((abs > 0x3FF) ? ((abs >= 0x7C00) ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) : FP_SUBNORMAL) - : FP_ZERO; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if finite number - /// \retval false else - static bool isfinite(half arg) - { - return (arg.data_ & 0x7C00) != 0x7C00; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if infinite number - /// \retval false else - static bool isinf(half arg) - { - return (arg.data_ & 0x7FFF) == 0x7C00; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if not a number - /// \retval false else - static bool isnan(half arg) - { - return (arg.data_ & 0x7FFF) > 0x7C00; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if normal number - /// \retval false else - static bool isnormal(half arg) - { - return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00); - } - - /// Sign bit implementation. - /// \param arg value to check - /// \retval true if signed - /// \retval false if unsigned - static bool signbit(half arg) - { - return (arg.data_ & 0x8000) != 0; - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - static bool isequal(half x, half y) - { - return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - static bool isnotequal(half x, half y) - { - return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x > \a y - /// \retval false else - static bool isgreater(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 - && (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x >= \a y - /// \retval false else - static bool isgreaterequal(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 - && (((xabs == x.data_) ? xabs : -xabs) >= ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x < \a y - /// \retval false else - static bool isless(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 - && (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x <= \a y - /// \retval false else - static bool islessequal(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 - && (((xabs == x.data_) ? xabs : -xabs) <= ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if either \a x > \a y nor \a x < \a y - /// \retval false else - static bool islessgreater(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if (xabs > 0x7C00 || yabs > 0x7C00) - return false; - int a = (xabs == x.data_) ? xabs : -xabs, b = (yabs == y.data_) ? yabs : -yabs; - return a < b || a > b; - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operand unordered - /// \retval false else - static bool isunordered(half x, half y) - { - return isnan(x) || isnan(y); - } - -private: - static double erf(double arg) - { - if (builtin_isinf(arg)) - return (arg < 0.0) ? -1.0 : 1.0; - double x2 = arg * arg, ax2 = 0.147 * x2, - value = std::sqrt(1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / (1.0 + ax2))); - return builtin_signbit(arg) ? -value : value; - } - - static double lgamma(double arg) - { - double v = 1.0; - for (; arg < 8.0; ++arg) - v *= arg; - double w = 1.0 / (arg * arg); - return (((((((-0.02955065359477124183006535947712 * w + 0.00641025641025641025641025641026) * w - + -0.00191752691752691752691752691753) - * w - + 8.4175084175084175084175084175084e-4) - * w - + -5.952380952380952380952380952381e-4) - * w - + 7.9365079365079365079365079365079e-4) - * w - + -0.00277777777777777777777777777778) - * w - + 0.08333333333333333333333333333333) - / arg - + 0.91893853320467274178032973640562 - std::log(v) - arg + (arg - 0.5) * std::log(arg); - } -}; - -/// Wrapper for unary half-precision functions needing specialization for individual argument types. -/// \tparam T argument type -template -struct unary_specialized -{ - /// Negation implementation. - /// \param arg value to negate - /// \return negated value - static HALF_CONSTEXPR half negate(half arg) - { - return half(binary, arg.data_ ^ 0x8000); - } - - /// Absolute value implementation. - /// \param arg function argument - /// \return absolute value - static half fabs(half arg) - { - return half(binary, arg.data_ & 0x7FFF); - } -}; -template <> -struct unary_specialized -{ - static HALF_CONSTEXPR expr negate(float arg) - { - return expr(-arg); - } - static expr fabs(float arg) - { - return expr(std::fabs(arg)); - } -}; - -/// Wrapper for binary half-precision functions needing specialization for individual argument types. -/// \tparam T first argument type -/// \tparam U first argument type -template -struct binary_specialized -{ - /// Minimum implementation. - /// \param x first operand - /// \param y second operand - /// \return minimum value - static expr fmin(float x, float y) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::fmin(x, y)); -#else - if (builtin_isnan(x)) - return expr(y); - if (builtin_isnan(y)) - return expr(x); - return expr(std::min(x, y)); -#endif - } - - /// Maximum implementation. - /// \param x first operand - /// \param y second operand - /// \return maximum value - static expr fmax(float x, float y) - { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::fmax(x, y)); -#else - if (builtin_isnan(x)) - return expr(y); - if (builtin_isnan(y)) - return expr(x); - return expr(std::max(x, y)); -#endif - } -}; -template <> -struct binary_specialized -{ - static half fmin(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if (xabs > 0x7C00) - return y; - if (yabs > 0x7C00) - return x; - return (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)) ? y : x; - } - static half fmax(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if (xabs > 0x7C00) - return y; - if (yabs > 0x7C00) - return x; - return (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)) ? y : x; - } -}; - -/// Helper class for half casts. -/// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member -/// function and a corresponding `type` member denoting its return type. -/// \tparam T destination type -/// \tparam U source type -/// \tparam R rounding mode to use -template -struct half_caster -{ -}; -template -struct half_caster -{ -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); -#endif - - static half cast(U arg) - { - return cast_impl(arg, is_float()); - }; - -private: - static half cast_impl(U arg, true_type) - { - return half(binary, float2half(arg)); - } - static half cast_impl(U arg, false_type) - { - return half(binary, int2half(arg)); - } -}; -template -struct half_caster -{ -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); -#endif - - static T cast(half arg) - { - return cast_impl(arg, is_float()); - } - -private: - static T cast_impl(half arg, true_type) - { - return half2float(arg.data_); - } - static T cast_impl(half arg, false_type) - { - return half2int(arg.data_); - } -}; -template -struct half_caster -{ -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); -#endif - - static T cast(expr arg) - { - return cast_impl(arg, is_float()); - } - -private: - static T cast_impl(float arg, true_type) - { - return static_cast(arg); - } - static T cast_impl(half arg, false_type) - { - return half2int(arg.data_); - } -}; -template -struct half_caster -{ - static half cast(half arg) - { - return arg; - } -}; -template -struct half_caster : half_caster -{ -}; - -/// \name Comparison operators -/// \{ - -/// Comparison for equality. -/// \param x first operand -/// \param y second operand -/// \retval true if operands equal -/// \retval false else -template -typename enable::type operator==(T x, U y) -{ - return functions::isequal(x, y); -} - -/// Comparison for inequality. -/// \param x first operand -/// \param y second operand -/// \retval true if operands not equal -/// \retval false else -template -typename enable::type operator!=(T x, U y) -{ - return functions::isnotequal(x, y); -} - -/// Comparison for less than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less than \a y -/// \retval false else -template -typename enable::type operator<(T x, U y) -{ - return functions::isless(x, y); -} - -/// Comparison for greater than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater than \a y -/// \retval false else -template -typename enable::type operator>(T x, U y) -{ - return functions::isgreater(x, y); -} - -/// Comparison for less equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less equal \a y -/// \retval false else -template -typename enable::type operator<=(T x, U y) -{ - return functions::islessequal(x, y); -} - -/// Comparison for greater equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater equal \a y -/// \retval false else -template -typename enable::type operator>=(T x, U y) -{ - return functions::isgreaterequal(x, y); -} - -/// \} -/// \name Arithmetic operators -/// \{ - -/// Add halfs. -/// \param x left operand -/// \param y right operand -/// \return sum of half expressions -template -typename enable::type operator+(T x, U y) -{ - return functions::plus(x, y); -} - -/// Subtract halfs. -/// \param x left operand -/// \param y right operand -/// \return difference of half expressions -template -typename enable::type operator-(T x, U y) -{ - return functions::minus(x, y); -} - -/// Multiply halfs. -/// \param x left operand -/// \param y right operand -/// \return product of half expressions -template -typename enable::type operator*(T x, U y) -{ - return functions::multiplies(x, y); -} - -/// Divide halfs. -/// \param x left operand -/// \param y right operand -/// \return quotient of half expressions -template -typename enable::type operator/(T x, U y) -{ - return functions::divides(x, y); -} - -/// Identity. -/// \param arg operand -/// \return uncahnged operand -template -HALF_CONSTEXPR typename enable::type operator+(T arg) -{ - return arg; -} - -/// Negation. -/// \param arg operand -/// \return negated operand -template -HALF_CONSTEXPR typename enable::type operator-(T arg) -{ - return unary_specialized::negate(arg); -} - -/// \} -/// \name Input and output -/// \{ - -/// Output operator. -/// \param out output stream to write into -/// \param arg half expression to write -/// \return reference to output stream -template -typename enable&, T>::type operator<<(std::basic_ostream& out, T arg) -{ - return functions::write(out, arg); -} - -/// Input operator. -/// \param in input stream to read from -/// \param arg half to read into -/// \return reference to input stream -template -std::basic_istream& operator>>(std::basic_istream& in, half& arg) -{ - return functions::read(in, arg); -} - -/// \} -/// \name Basic mathematical operations -/// \{ - -/// Absolute value. -/// \param arg operand -/// \return absolute value of \a arg -// template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } -inline half abs(half arg) -{ - return unary_specialized::fabs(arg); -} -inline expr abs(expr arg) -{ - return unary_specialized::fabs(arg); -} - -/// Absolute value. -/// \param arg operand -/// \return absolute value of \a arg -// template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } -inline half fabs(half arg) -{ - return unary_specialized::fabs(arg); -} -inline expr fabs(expr arg) -{ - return unary_specialized::fabs(arg); -} - -/// Remainder of division. -/// \param x first operand -/// \param y second operand -/// \return remainder of floating point division. -// template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } -inline expr fmod(half x, half y) -{ - return functions::fmod(x, y); -} -inline expr fmod(half x, expr y) -{ - return functions::fmod(x, y); -} -inline expr fmod(expr x, half y) -{ - return functions::fmod(x, y); -} -inline expr fmod(expr x, expr y) -{ - return functions::fmod(x, y); -} - -/// Remainder of division. -/// \param x first operand -/// \param y second operand -/// \return remainder of floating point division. -// template typename enable::type remainder(T x, U y) { return -// functions::remainder(x, y); } -inline expr remainder(half x, half y) -{ - return functions::remainder(x, y); -} -inline expr remainder(half x, expr y) -{ - return functions::remainder(x, y); -} -inline expr remainder(expr x, half y) -{ - return functions::remainder(x, y); -} -inline expr remainder(expr x, expr y) -{ - return functions::remainder(x, y); -} - -/// Remainder of division. -/// \param x first operand -/// \param y second operand -/// \param quo address to store some bits of quotient at -/// \return remainder of floating point division. -// template typename enable::type remquo(T x, U y, int *quo) { return -// functions::remquo(x, y, quo); } -inline expr remquo(half x, half y, int* quo) -{ - return functions::remquo(x, y, quo); -} -inline expr remquo(half x, expr y, int* quo) -{ - return functions::remquo(x, y, quo); -} -inline expr remquo(expr x, half y, int* quo) -{ - return functions::remquo(x, y, quo); -} -inline expr remquo(expr x, expr y, int* quo) -{ - return functions::remquo(x, y, quo); -} - -/// Fused multiply add. -/// \param x first operand -/// \param y second operand -/// \param z third operand -/// \return ( \a x * \a y ) + \a z rounded as one operation. -// template typename enable::type fma(T x, U y, V z) { return -// functions::fma(x, y, z); } -inline expr fma(half x, half y, half z) -{ - return functions::fma(x, y, z); -} -inline expr fma(half x, half y, expr z) -{ - return functions::fma(x, y, z); -} -inline expr fma(half x, expr y, half z) -{ - return functions::fma(x, y, z); -} -inline expr fma(half x, expr y, expr z) -{ - return functions::fma(x, y, z); -} -inline expr fma(expr x, half y, half z) -{ - return functions::fma(x, y, z); -} -inline expr fma(expr x, half y, expr z) -{ - return functions::fma(x, y, z); -} -inline expr fma(expr x, expr y, half z) -{ - return functions::fma(x, y, z); -} -inline expr fma(expr x, expr y, expr z) -{ - return functions::fma(x, y, z); -} - -/// Maximum of half expressions. -/// \param x first operand -/// \param y second operand -/// \return maximum of operands -// template typename result::type fmax(T x, U y) { return -// binary_specialized::fmax(x, y); } -inline half fmax(half x, half y) -{ - return binary_specialized::fmax(x, y); -} -inline expr fmax(half x, expr y) -{ - return binary_specialized::fmax(x, y); -} -inline expr fmax(expr x, half y) -{ - return binary_specialized::fmax(x, y); -} -inline expr fmax(expr x, expr y) -{ - return binary_specialized::fmax(x, y); -} - -/// Minimum of half expressions. -/// \param x first operand -/// \param y second operand -/// \return minimum of operands -// template typename result::type fmin(T x, U y) { return -// binary_specialized::fmin(x, y); } -inline half fmin(half x, half y) -{ - return binary_specialized::fmin(x, y); -} -inline expr fmin(half x, expr y) -{ - return binary_specialized::fmin(x, y); -} -inline expr fmin(expr x, half y) -{ - return binary_specialized::fmin(x, y); -} -inline expr fmin(expr x, expr y) -{ - return binary_specialized::fmin(x, y); -} - -/// Positive difference. -/// \param x first operand -/// \param y second operand -/// \return \a x - \a y or 0 if difference negative -// template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } -inline expr fdim(half x, half y) -{ - return functions::fdim(x, y); -} -inline expr fdim(half x, expr y) -{ - return functions::fdim(x, y); -} -inline expr fdim(expr x, half y) -{ - return functions::fdim(x, y); -} -inline expr fdim(expr x, expr y) -{ - return functions::fdim(x, y); -} - -/// Get NaN value. -/// \return quiet NaN -inline half nanh(const char*) -{ - return functions::nanh(); -} - -/// \} -/// \name Exponential functions -/// \{ - -/// Exponential function. -/// \param arg function argument -/// \return e raised to \a arg -// template typename enable::type exp(T arg) { return functions::exp(arg); } -inline expr exp(half arg) -{ - return functions::exp(arg); -} -inline expr exp(expr arg) -{ - return functions::exp(arg); -} - -/// Exponential minus one. -/// \param arg function argument -/// \return e raised to \a arg subtracted by 1 -// template typename enable::type expm1(T arg) { return functions::expm1(arg); } -inline expr expm1(half arg) -{ - return functions::expm1(arg); -} -inline expr expm1(expr arg) -{ - return functions::expm1(arg); -} - -/// Binary exponential. -/// \param arg function argument -/// \return 2 raised to \a arg -// template typename enable::type exp2(T arg) { return functions::exp2(arg); } -inline expr exp2(half arg) -{ - return functions::exp2(arg); -} -inline expr exp2(expr arg) -{ - return functions::exp2(arg); -} - -/// Natural logorithm. -/// \param arg function argument -/// \return logarithm of \a arg to base e -// template typename enable::type log(T arg) { return functions::log(arg); } -inline expr log(half arg) -{ - return functions::log(arg); -} -inline expr log(expr arg) -{ - return functions::log(arg); -} - -/// Common logorithm. -/// \param arg function argument -/// \return logarithm of \a arg to base 10 -// template typename enable::type log10(T arg) { return functions::log10(arg); } -inline expr log10(half arg) -{ - return functions::log10(arg); -} -inline expr log10(expr arg) -{ - return functions::log10(arg); -} - -/// Natural logorithm. -/// \param arg function argument -/// \return logarithm of \a arg plus 1 to base e -// template typename enable::type log1p(T arg) { return functions::log1p(arg); } -inline expr log1p(half arg) -{ - return functions::log1p(arg); -} -inline expr log1p(expr arg) -{ - return functions::log1p(arg); -} - -/// Binary logorithm. -/// \param arg function argument -/// \return logarithm of \a arg to base 2 -// template typename enable::type log2(T arg) { return functions::log2(arg); } -inline expr log2(half arg) -{ - return functions::log2(arg); -} -inline expr log2(expr arg) -{ - return functions::log2(arg); -} - -/// \} -/// \name Power functions -/// \{ - -/// Square root. -/// \param arg function argument -/// \return square root of \a arg -// template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } -inline expr sqrt(half arg) -{ - return functions::sqrt(arg); -} -inline expr sqrt(expr arg) -{ - return functions::sqrt(arg); -} - -/// Cubic root. -/// \param arg function argument -/// \return cubic root of \a arg -// template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } -inline expr cbrt(half arg) -{ - return functions::cbrt(arg); -} -inline expr cbrt(expr arg) -{ - return functions::cbrt(arg); -} - -/// Hypotenuse function. -/// \param x first argument -/// \param y second argument -/// \return square root of sum of squares without internal over- or underflows -// template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); -//} -inline expr hypot(half x, half y) -{ - return functions::hypot(x, y); -} -inline expr hypot(half x, expr y) -{ - return functions::hypot(x, y); -} -inline expr hypot(expr x, half y) -{ - return functions::hypot(x, y); -} -inline expr hypot(expr x, expr y) -{ - return functions::hypot(x, y); -} - -/// Power function. -/// \param base first argument -/// \param exp second argument -/// \return \a base raised to \a exp -// template typename enable::type pow(T base, U exp) { return functions::pow(base, -// exp); } -inline expr pow(half base, half exp) -{ - return functions::pow(base, exp); -} -inline expr pow(half base, expr exp) -{ - return functions::pow(base, exp); -} -inline expr pow(expr base, half exp) -{ - return functions::pow(base, exp); -} -inline expr pow(expr base, expr exp) -{ - return functions::pow(base, exp); -} - -/// \} -/// \name Trigonometric functions -/// \{ - -/// Sine function. -/// \param arg function argument -/// \return sine value of \a arg -// template typename enable::type sin(T arg) { return functions::sin(arg); } -inline expr sin(half arg) -{ - return functions::sin(arg); -} -inline expr sin(expr arg) -{ - return functions::sin(arg); -} - -/// Cosine function. -/// \param arg function argument -/// \return cosine value of \a arg -// template typename enable::type cos(T arg) { return functions::cos(arg); } -inline expr cos(half arg) -{ - return functions::cos(arg); -} -inline expr cos(expr arg) -{ - return functions::cos(arg); -} - -/// Tangent function. -/// \param arg function argument -/// \return tangent value of \a arg -// template typename enable::type tan(T arg) { return functions::tan(arg); } -inline expr tan(half arg) -{ - return functions::tan(arg); -} -inline expr tan(expr arg) -{ - return functions::tan(arg); -} - -/// Arc sine. -/// \param arg function argument -/// \return arc sine value of \a arg -// template typename enable::type asin(T arg) { return functions::asin(arg); } -inline expr asin(half arg) -{ - return functions::asin(arg); -} -inline expr asin(expr arg) -{ - return functions::asin(arg); -} - -/// Arc cosine function. -/// \param arg function argument -/// \return arc cosine value of \a arg -// template typename enable::type acos(T arg) { return functions::acos(arg); } -inline expr acos(half arg) -{ - return functions::acos(arg); -} -inline expr acos(expr arg) -{ - return functions::acos(arg); -} - -/// Arc tangent function. -/// \param arg function argument -/// \return arc tangent value of \a arg -// template typename enable::type atan(T arg) { return functions::atan(arg); } -inline expr atan(half arg) -{ - return functions::atan(arg); -} -inline expr atan(expr arg) -{ - return functions::atan(arg); -} - -/// Arc tangent function. -/// \param x first argument -/// \param y second argument -/// \return arc tangent value -// template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); -//} -inline expr atan2(half x, half y) -{ - return functions::atan2(x, y); -} -inline expr atan2(half x, expr y) -{ - return functions::atan2(x, y); -} -inline expr atan2(expr x, half y) -{ - return functions::atan2(x, y); -} -inline expr atan2(expr x, expr y) -{ - return functions::atan2(x, y); -} - -/// \} -/// \name Hyperbolic functions -/// \{ - -/// Hyperbolic sine. -/// \param arg function argument -/// \return hyperbolic sine value of \a arg -// template typename enable::type sinh(T arg) { return functions::sinh(arg); } -inline expr sinh(half arg) -{ - return functions::sinh(arg); -} -inline expr sinh(expr arg) -{ - return functions::sinh(arg); -} - -/// Hyperbolic cosine. -/// \param arg function argument -/// \return hyperbolic cosine value of \a arg -// template typename enable::type cosh(T arg) { return functions::cosh(arg); } -inline expr cosh(half arg) -{ - return functions::cosh(arg); -} -inline expr cosh(expr arg) -{ - return functions::cosh(arg); -} - -/// Hyperbolic tangent. -/// \param arg function argument -/// \return hyperbolic tangent value of \a arg -// template typename enable::type tanh(T arg) { return functions::tanh(arg); } -inline expr tanh(half arg) -{ - return functions::tanh(arg); -} -inline expr tanh(expr arg) -{ - return functions::tanh(arg); -} - -/// Hyperbolic area sine. -/// \param arg function argument -/// \return area sine value of \a arg -// template typename enable::type asinh(T arg) { return functions::asinh(arg); } -inline expr asinh(half arg) -{ - return functions::asinh(arg); -} -inline expr asinh(expr arg) -{ - return functions::asinh(arg); -} - -/// Hyperbolic area cosine. -/// \param arg function argument -/// \return area cosine value of \a arg -// template typename enable::type acosh(T arg) { return functions::acosh(arg); } -inline expr acosh(half arg) -{ - return functions::acosh(arg); -} -inline expr acosh(expr arg) -{ - return functions::acosh(arg); -} - -/// Hyperbolic area tangent. -/// \param arg function argument -/// \return area tangent value of \a arg -// template typename enable::type atanh(T arg) { return functions::atanh(arg); } -inline expr atanh(half arg) -{ - return functions::atanh(arg); -} -inline expr atanh(expr arg) -{ - return functions::atanh(arg); -} - -/// \} -/// \name Error and gamma functions -/// \{ - -/// Error function. -/// \param arg function argument -/// \return error function value of \a arg -// template typename enable::type erf(T arg) { return functions::erf(arg); } -inline expr erf(half arg) -{ - return functions::erf(arg); -} -inline expr erf(expr arg) -{ - return functions::erf(arg); -} - -/// Complementary error function. -/// \param arg function argument -/// \return 1 minus error function value of \a arg -// template typename enable::type erfc(T arg) { return functions::erfc(arg); } -inline expr erfc(half arg) -{ - return functions::erfc(arg); -} -inline expr erfc(expr arg) -{ - return functions::erfc(arg); -} - -/// Natural logarithm of gamma function. -/// \param arg function argument -/// \return natural logarith of gamma function for \a arg -// template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } -inline expr lgamma(half arg) -{ - return functions::lgamma(arg); -} -inline expr lgamma(expr arg) -{ - return functions::lgamma(arg); -} - -/// Gamma function. -/// \param arg function argument -/// \return gamma function value of \a arg -// template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } -inline expr tgamma(half arg) -{ - return functions::tgamma(arg); -} -inline expr tgamma(expr arg) -{ - return functions::tgamma(arg); -} - -/// \} -/// \name Rounding -/// \{ - -/// Nearest integer not less than half value. -/// \param arg half to round -/// \return nearest integer not less than \a arg -// template typename enable::type ceil(T arg) { return functions::ceil(arg); } -inline half ceil(half arg) -{ - return functions::ceil(arg); -} -inline half ceil(expr arg) -{ - return functions::ceil(arg); -} - -/// Nearest integer not greater than half value. -/// \param arg half to round -/// \return nearest integer not greater than \a arg -// template typename enable::type floor(T arg) { return functions::floor(arg); } -inline half floor(half arg) -{ - return functions::floor(arg); -} -inline half floor(expr arg) -{ - return functions::floor(arg); -} - -/// Nearest integer not greater in magnitude than half value. -/// \param arg half to round -/// \return nearest integer not greater in magnitude than \a arg -// template typename enable::type trunc(T arg) { return functions::trunc(arg); } -inline half trunc(half arg) -{ - return functions::trunc(arg); -} -inline half trunc(expr arg) -{ - return functions::trunc(arg); -} - -/// Nearest integer. -/// \param arg half to round -/// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type round(T arg) { return functions::round(arg); } -inline half round(half arg) -{ - return functions::round(arg); -} -inline half round(expr arg) -{ - return functions::round(arg); -} - -/// Nearest integer. -/// \param arg half to round -/// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type lround(T arg) { return functions::lround(arg); } -inline long lround(half arg) -{ - return functions::lround(arg); -} -inline long lround(expr arg) -{ - return functions::lround(arg); -} - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } -inline half nearbyint(half arg) -{ - return functions::rint(arg); -} -inline half nearbyint(expr arg) -{ - return functions::rint(arg); -} - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type rint(T arg) { return functions::rint(arg); } -inline half rint(half arg) -{ - return functions::rint(arg); -} -inline half rint(expr arg) -{ - return functions::rint(arg); -} - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type lrint(T arg) { return functions::lrint(arg); } -inline long lrint(half arg) -{ - return functions::lrint(arg); -} -inline long lrint(expr arg) -{ - return functions::lrint(arg); -} -#if HALF_ENABLE_CPP11_LONG_LONG -/// Nearest integer. -/// \param arg half to round -/// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type llround(T arg) { return functions::llround(arg); } -inline long long llround(half arg) -{ - return functions::llround(arg); -} -inline long long llround(expr arg) -{ - return functions::llround(arg); -} - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type llrint(T arg) { return functions::llrint(arg); } -inline long long llrint(half arg) -{ - return functions::llrint(arg); -} -inline long long llrint(expr arg) -{ - return functions::llrint(arg); -} -#endif - -/// \} -/// \name Floating point manipulation -/// \{ - -/// Decompress floating point number. -/// \param arg number to decompress -/// \param exp address to store exponent at -/// \return significant in range [0.5, 1) -// template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } -inline half frexp(half arg, int* exp) -{ - return functions::frexp(arg, exp); -} -inline half frexp(expr arg, int* exp) -{ - return functions::frexp(arg, exp); -} - -/// Multiply by power of two. -/// \param arg number to modify -/// \param exp power of two to multiply with -/// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); -//} -inline half ldexp(half arg, int exp) -{ - return functions::scalbln(arg, exp); -} -inline half ldexp(expr arg, int exp) -{ - return functions::scalbln(arg, exp); -} - -/// Extract integer and fractional parts. -/// \param arg number to decompress -/// \param iptr address to store integer part at -/// \return fractional part -// template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); -//} -inline half modf(half arg, half* iptr) -{ - return functions::modf(arg, iptr); -} -inline half modf(expr arg, half* iptr) -{ - return functions::modf(arg, iptr); -} - -/// Multiply by power of two. -/// \param arg number to modify -/// \param exp power of two to multiply with -/// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); -//} -inline half scalbn(half arg, int exp) -{ - return functions::scalbln(arg, exp); -} -inline half scalbn(expr arg, int exp) -{ - return functions::scalbln(arg, exp); -} - -/// Multiply by power of two. -/// \param arg number to modify -/// \param exp power of two to multiply with -/// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, -// exp); -//} -inline half scalbln(half arg, long exp) -{ - return functions::scalbln(arg, exp); -} -inline half scalbln(expr arg, long exp) -{ - return functions::scalbln(arg, exp); -} - -/// Extract exponent. -/// \param arg number to query -/// \return floating point exponent -/// \retval FP_ILOGB0 for zero -/// \retval FP_ILOGBNAN for NaN -/// \retval MAX_INT for infinity -// template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } -inline int ilogb(half arg) -{ - return functions::ilogb(arg); -} -inline int ilogb(expr arg) -{ - return functions::ilogb(arg); -} - -/// Extract exponent. -/// \param arg number to query -/// \return floating point exponent -// template typename enable::type logb(T arg) { return functions::logb(arg); } -inline half logb(half arg) -{ - return functions::logb(arg); -} -inline half logb(expr arg) -{ - return functions::logb(arg); -} - -/// Next representable value. -/// \param from value to compute next representable value for -/// \param to direction towards which to compute next value -/// \return next representable value after \a from in direction towards \a to -// template typename enable::type nextafter(T from, U to) { return -// functions::nextafter(from, to); } -inline half nextafter(half from, half to) -{ - return functions::nextafter(from, to); -} -inline half nextafter(half from, expr to) -{ - return functions::nextafter(from, to); -} -inline half nextafter(expr from, half to) -{ - return functions::nextafter(from, to); -} -inline half nextafter(expr from, expr to) -{ - return functions::nextafter(from, to); -} - -/// Next representable value. -/// \param from value to compute next representable value for -/// \param to direction towards which to compute next value -/// \return next representable value after \a from in direction towards \a to -// template typename enable::type nexttoward(T from, long double to) { return -// functions::nexttoward(from, to); } -inline half nexttoward(half from, long double to) -{ - return functions::nexttoward(from, to); -} -inline half nexttoward(expr from, long double to) -{ - return functions::nexttoward(from, to); -} - -/// Take sign. -/// \param x value to change sign for -/// \param y value to take sign from -/// \return value equal to \a x in magnitude and to \a y in sign -// template typename enable::type copysign(T x, U y) { return -// functions::copysign(x, y); } -inline half copysign(half x, half y) -{ - return functions::copysign(x, y); -} -inline half copysign(half x, expr y) -{ - return functions::copysign(x, y); -} -inline half copysign(expr x, half y) -{ - return functions::copysign(x, y); -} -inline half copysign(expr x, expr y) -{ - return functions::copysign(x, y); -} - -/// \} -/// \name Floating point classification -/// \{ - -/// Classify floating point value. -/// \param arg number to classify -/// \retval FP_ZERO for positive and negative zero -/// \retval FP_SUBNORMAL for subnormal numbers -/// \retval FP_INFINITY for positive and negative infinity -/// \retval FP_NAN for NaNs -/// \retval FP_NORMAL for all other (normal) values -// template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } -inline int fpclassify(half arg) -{ - return functions::fpclassify(arg); -} -inline int fpclassify(expr arg) -{ - return functions::fpclassify(arg); -} - -/// Check if finite number. -/// \param arg number to check -/// \retval true if neither infinity nor NaN -/// \retval false else -// template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } -inline bool isfinite(half arg) -{ - return functions::isfinite(arg); -} -inline bool isfinite(expr arg) -{ - return functions::isfinite(arg); -} - -/// Check for infinity. -/// \param arg number to check -/// \retval true for positive or negative infinity -/// \retval false else -// template typename enable::type isinf(T arg) { return functions::isinf(arg); } -inline bool isinf(half arg) -{ - return functions::isinf(arg); -} -inline bool isinf(expr arg) -{ - return functions::isinf(arg); -} - -/// Check for NaN. -/// \param arg number to check -/// \retval true for NaNs -/// \retval false else -// template typename enable::type isnan(T arg) { return functions::isnan(arg); } -inline bool isnan(half arg) -{ - return functions::isnan(arg); -} -inline bool isnan(expr arg) -{ - return functions::isnan(arg); -} - -/// Check if normal number. -/// \param arg number to check -/// \retval true if normal number -/// \retval false if either subnormal, zero, infinity or NaN -// template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } -inline bool isnormal(half arg) -{ - return functions::isnormal(arg); -} -inline bool isnormal(expr arg) -{ - return functions::isnormal(arg); -} - -/// Check sign. -/// \param arg number to check -/// \retval true for negative number -/// \retval false for positive number -// template typename enable::type signbit(T arg) { return functions::signbit(arg); } -inline bool signbit(half arg) -{ - return functions::signbit(arg); -} -inline bool signbit(expr arg) -{ - return functions::signbit(arg); -} - -/// \} -/// \name Comparison -/// \{ - -/// Comparison for greater than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater than \a y -/// \retval false else -// template typename enable::type isgreater(T x, U y) { return -// functions::isgreater(x, y); } -inline bool isgreater(half x, half y) -{ - return functions::isgreater(x, y); -} -inline bool isgreater(half x, expr y) -{ - return functions::isgreater(x, y); -} -inline bool isgreater(expr x, half y) -{ - return functions::isgreater(x, y); -} -inline bool isgreater(expr x, expr y) -{ - return functions::isgreater(x, y); -} - -/// Comparison for greater equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater equal \a y -/// \retval false else -// template typename enable::type isgreaterequal(T x, U y) { return -// functions::isgreaterequal(x, y); } -inline bool isgreaterequal(half x, half y) -{ - return functions::isgreaterequal(x, y); -} -inline bool isgreaterequal(half x, expr y) -{ - return functions::isgreaterequal(x, y); -} -inline bool isgreaterequal(expr x, half y) -{ - return functions::isgreaterequal(x, y); -} -inline bool isgreaterequal(expr x, expr y) -{ - return functions::isgreaterequal(x, y); -} - -/// Comparison for less than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less than \a y -/// \retval false else -// template typename enable::type isless(T x, U y) { return functions::isless(x, -// y); -//} -inline bool isless(half x, half y) -{ - return functions::isless(x, y); -} -inline bool isless(half x, expr y) -{ - return functions::isless(x, y); -} -inline bool isless(expr x, half y) -{ - return functions::isless(x, y); -} -inline bool isless(expr x, expr y) -{ - return functions::isless(x, y); -} - -/// Comparison for less equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less equal \a y -/// \retval false else -// template typename enable::type islessequal(T x, U y) { return -// functions::islessequal(x, y); } -inline bool islessequal(half x, half y) -{ - return functions::islessequal(x, y); -} -inline bool islessequal(half x, expr y) -{ - return functions::islessequal(x, y); -} -inline bool islessequal(expr x, half y) -{ - return functions::islessequal(x, y); -} -inline bool islessequal(expr x, expr y) -{ - return functions::islessequal(x, y); -} - -/// Comarison for less or greater. -/// \param x first operand -/// \param y second operand -/// \retval true if either less or greater -/// \retval false else -// template typename enable::type islessgreater(T x, U y) { return -// functions::islessgreater(x, y); } -inline bool islessgreater(half x, half y) -{ - return functions::islessgreater(x, y); -} -inline bool islessgreater(half x, expr y) -{ - return functions::islessgreater(x, y); -} -inline bool islessgreater(expr x, half y) -{ - return functions::islessgreater(x, y); -} -inline bool islessgreater(expr x, expr y) -{ - return functions::islessgreater(x, y); -} - -/// Check if unordered. -/// \param x first operand -/// \param y second operand -/// \retval true if unordered (one or two NaN operands) -/// \retval false else -// template typename enable::type isunordered(T x, U y) { return -// functions::isunordered(x, y); } -inline bool isunordered(half x, half y) -{ - return functions::isunordered(x, y); -} -inline bool isunordered(half x, expr y) -{ - return functions::isunordered(x, y); -} -inline bool isunordered(expr x, half y) -{ - return functions::isunordered(x, y); -} -inline bool isunordered(expr x, expr y) -{ - return functions::isunordered(x, y); -} - -/// \name Casting -/// \{ - -/// Cast to or from half-precision floating point number. -/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted -/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. -/// It uses the default rounding mode. -/// -/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types -/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler -/// error and casting between [half](\ref half_float::half)s is just a no-op. -/// \tparam T destination type (half or built-in arithmetic type) -/// \tparam U source type (half or built-in arithmetic type) -/// \param arg value to cast -/// \return \a arg converted to destination type -template -T half_cast(U arg) -{ - return half_caster::cast(arg); -} - -/// Cast to or from half-precision floating point number. -/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted -/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. -/// -/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types -/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler -/// error and casting between [half](\ref half_float::half)s is just a no-op. -/// \tparam T destination type (half or built-in arithmetic type) -/// \tparam R rounding mode to use. -/// \tparam U source type (half or built-in arithmetic type) -/// \param arg value to cast -/// \return \a arg converted to destination type -template -T half_cast(U arg) -{ - return half_caster::cast(arg); -} -/// \} -} // namespace detail - -using detail::operator==; -using detail::operator!=; -using detail::operator<; -using detail::operator>; -using detail::operator<=; -using detail::operator>=; -using detail::operator+; -using detail::operator-; -using detail::operator*; -using detail::operator/; -using detail::operator<<; -using detail::operator>>; - -using detail::abs; -using detail::acos; -using detail::acosh; -using detail::asin; -using detail::asinh; -using detail::atan; -using detail::atan2; -using detail::atanh; -using detail::cbrt; -using detail::ceil; -using detail::cos; -using detail::cosh; -using detail::erf; -using detail::erfc; -using detail::exp; -using detail::exp2; -using detail::expm1; -using detail::fabs; -using detail::fdim; -using detail::floor; -using detail::fma; -using detail::fmax; -using detail::fmin; -using detail::fmod; -using detail::hypot; -using detail::lgamma; -using detail::log; -using detail::log10; -using detail::log1p; -using detail::log2; -using detail::lrint; -using detail::lround; -using detail::nanh; -using detail::nearbyint; -using detail::pow; -using detail::remainder; -using detail::remquo; -using detail::rint; -using detail::round; -using detail::sin; -using detail::sinh; -using detail::sqrt; -using detail::tan; -using detail::tanh; -using detail::tgamma; -using detail::trunc; -#if HALF_ENABLE_CPP11_LONG_LONG -using detail::llrint; -using detail::llround; -#endif -using detail::copysign; -using detail::fpclassify; -using detail::frexp; -using detail::ilogb; -using detail::isfinite; -using detail::isgreater; -using detail::isgreaterequal; -using detail::isinf; -using detail::isless; -using detail::islessequal; -using detail::islessgreater; -using detail::isnan; -using detail::isnormal; -using detail::isunordered; -using detail::ldexp; -using detail::logb; -using detail::modf; -using detail::nextafter; -using detail::nexttoward; -using detail::scalbln; -using detail::scalbn; -using detail::signbit; - -using detail::half_cast; -} // namespace half_float - -/// Extensions to the C++ standard library. -namespace std -{ -/// Numeric limits for half-precision floats. -/// Because of the underlying single-precision implementation of many operations, it inherits some properties from -/// `std::numeric_limits`. -template <> -class numeric_limits : public numeric_limits -{ -public: - /// Supports signed values. - static HALF_CONSTEXPR_CONST bool is_signed = true; - - /// Is not exact. - static HALF_CONSTEXPR_CONST bool is_exact = false; - - /// Doesn't provide modulo arithmetic. - static HALF_CONSTEXPR_CONST bool is_modulo = false; - - /// IEEE conformant. - static HALF_CONSTEXPR_CONST bool is_iec559 = true; - - /// Supports infinity. - static HALF_CONSTEXPR_CONST bool has_infinity = true; - - /// Supports quiet NaNs. - static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; - - /// Supports subnormal values. - static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; - - /// Rounding mode. - /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying - /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding - /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the - /// single-precision rounding mode. - static HALF_CONSTEXPR_CONST float_round_style round_style - = (std::numeric_limits::round_style == half_float::half::round_style) ? half_float::half::round_style - : round_indeterminate; - - /// Significant digits. - static HALF_CONSTEXPR_CONST int digits = 11; - - /// Significant decimal digits. - static HALF_CONSTEXPR_CONST int digits10 = 3; - - /// Required decimal digits to represent all possible values. - static HALF_CONSTEXPR_CONST int max_digits10 = 5; - - /// Number base. - static HALF_CONSTEXPR_CONST int radix = 2; - - /// One more than smallest exponent. - static HALF_CONSTEXPR_CONST int min_exponent = -13; - - /// Smallest normalized representable power of 10. - static HALF_CONSTEXPR_CONST int min_exponent10 = -4; - - /// One more than largest exponent - static HALF_CONSTEXPR_CONST int max_exponent = 16; - - /// Largest finitely representable power of 10. - static HALF_CONSTEXPR_CONST int max_exponent10 = 4; - - /// Smallest positive normal value. - static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x0400); - } - - /// Smallest finite value. - static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0xFBFF); - } - - /// Largest finite value. - static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x7BFF); - } - - /// Difference between one and next representable value. - static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x1400); - } - - /// Maximum rounding error. - static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, (round_style == std::round_to_nearest) ? 0x3800 : 0x3C00); - } - - /// Positive infinity. - static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x7C00); - } - - /// Quiet NaN. - static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x7FFF); - } - - /// Signalling NaN. - static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x7DFF); - } - - /// Smallest positive subnormal value. - static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW - { - return half_float::half(half_float::detail::binary, 0x0001); - } -}; - -#if HALF_ENABLE_CPP11_HASH -/// Hash function for half-precision floats. -/// This is only defined if C++11 `std::hash` is supported and enabled. -template <> -struct hash //: unary_function -{ - /// Type of function argument. - typedef half_float::half argument_type; - - /// Function return type. - typedef size_t result_type; - - /// Compute hash function. - /// \param arg half to hash - /// \return hash value - result_type operator()(argument_type arg) const - { - return hash()(static_cast(arg.data_) & -(arg.data_ != 0x8000)); - } -}; -#endif -} // namespace std - -#undef HALF_CONSTEXPR -#undef HALF_CONSTEXPR_CONST -#undef HALF_NOEXCEPT -#undef HALF_NOTHROW -#ifdef HALF_POP_WARNINGS -#pragma warning(pop) -#undef HALF_POP_WARNINGS -#endif - -#endif diff --git a/src/Detector/tensorrt_yolo/common_deprecated/logger.cpp b/src/Detector/tensorrt_yolo/common_deprecated/logger.cpp deleted file mode 100644 index 03c643984..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/logger.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "logger.h" -#include "ErrorRecorder.h" -#include "logging.h" - -SampleErrorRecorder gRecorder; -namespace sample -{ -Logger gLogger{Logger::Severity::kINFO}; -LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; -LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; -LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; -LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; -LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; - -void setReportableSeverity(Logger::Severity severity) -{ - gLogger.setReportableSeverity(severity); - gLogVerbose.setReportableSeverity(severity); - gLogInfo.setReportableSeverity(severity); - gLogWarning.setReportableSeverity(severity); - gLogError.setReportableSeverity(severity); - gLogFatal.setReportableSeverity(severity); -} -} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common_deprecated/logger.h b/src/Detector/tensorrt_yolo/common_deprecated/logger.h deleted file mode 100644 index 3069e8e90..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/logger.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LOGGER_H -#define LOGGER_H - -#include "logging.h" - -class SampleErrorRecorder; -extern SampleErrorRecorder gRecorder; -namespace sample -{ -extern Logger gLogger; -extern LogStreamConsumer gLogVerbose; -extern LogStreamConsumer gLogInfo; -extern LogStreamConsumer gLogWarning; -extern LogStreamConsumer gLogError; -extern LogStreamConsumer gLogFatal; - -void setReportableSeverity(Logger::Severity severity); -} // namespace sample - -#endif // LOGGER_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/logging.h b/src/Detector/tensorrt_yolo/common_deprecated/logging.h deleted file mode 100644 index 78732c10f..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/logging.h +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORRT_LOGGING_H -#define TENSORRT_LOGGING_H - -#include "NvInferRuntimeCommon.h" -#include "sampleOptions.h" -#include -#include -#include -#include -#include -#include -#include -#include - -namespace sample -{ - -using Severity = nvinfer1::ILogger::Severity; - -class LogStreamConsumerBuffer : public std::stringbuf -{ -public: - LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) - : mOutput(stream) - , mPrefix(prefix) - , mShouldLog(shouldLog) - { - } - - LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept - : mOutput(other.mOutput) - , mPrefix(other.mPrefix) - , mShouldLog(other.mShouldLog) - { - } - LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete; - LogStreamConsumerBuffer() = delete; - LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete; - LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete; - - ~LogStreamConsumerBuffer() override - { - // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence - // std::streambuf::pptr() gives a pointer to the current position of the output sequence - // if the pointer to the beginning is not equal to the pointer to the current position, - // call putOutput() to log the output to the stream - if (pbase() != pptr()) - { - putOutput(); - } - } - - //! - //! synchronizes the stream buffer and returns 0 on success - //! synchronizing the stream buffer consists of inserting the buffer contents into the stream, - //! resetting the buffer and flushing the stream - //! - int32_t sync() override - { - putOutput(); - return 0; - } - - void putOutput() - { - if (mShouldLog) - { - // prepend timestamp - std::time_t timestamp = std::time(nullptr); - tm* tm_local = std::localtime(×tamp); - mOutput << "["; - mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; - mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; - // std::stringbuf::str() gets the string contents of the buffer - // insert the buffer contents pre-appended by the appropriate prefix into the stream - mOutput << mPrefix << str(); - } - // set the buffer to empty - str(""); - // flush the stream - mOutput.flush(); - } - - void setShouldLog(bool shouldLog) - { - mShouldLog = shouldLog; - } - -private: - std::ostream& mOutput; - std::string mPrefix; - bool mShouldLog{}; -}; // class LogStreamConsumerBuffer - -//! -//! \class LogStreamConsumerBase -//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer -//! -class LogStreamConsumerBase -{ -public: - LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) - : mBuffer(stream, prefix, shouldLog) - { - } - -protected: - std::mutex mLogMutex; - LogStreamConsumerBuffer mBuffer; -}; // class LogStreamConsumerBase - -//! -//! \class LogStreamConsumer -//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. -//! Order of base classes is LogStreamConsumerBase and then std::ostream. -//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field -//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. -//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. -//! Please do not change the order of the parent classes. -//! -class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream -{ -public: - //! - //! \brief Creates a LogStreamConsumer which logs messages with level severity. - //! Reportable severity determines if the messages are severe enough to be logged. - //! - LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, nvinfer1::ILogger::Severity severity) - : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) - , std::ostream(&mBuffer) // links the stream buffer with the stream - , mShouldLog(severity <= reportableSeverity) - , mSeverity(severity) - { - } - - LogStreamConsumer(LogStreamConsumer&& other) noexcept - : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) - , std::ostream(&mBuffer) // links the stream buffer with the stream - , mShouldLog(other.mShouldLog) - , mSeverity(other.mSeverity) - { - } - LogStreamConsumer(const LogStreamConsumer& other) = delete; - LogStreamConsumer() = delete; - ~LogStreamConsumer() = default; - LogStreamConsumer& operator=(const LogStreamConsumer&) = delete; - LogStreamConsumer& operator=(LogStreamConsumer&&) = delete; - - void setReportableSeverity(Severity reportableSeverity) - { - mShouldLog = mSeverity <= reportableSeverity; - mBuffer.setShouldLog(mShouldLog); - } - - std::mutex& getMutex() - { - return mLogMutex; - } - - bool getShouldLog() const - { - return mShouldLog; - } - -private: - static std::ostream& severityOstream(Severity severity) - { - return severity >= Severity::kINFO ? std::cout : std::cerr; - } - - static std::string severityPrefix(Severity severity) - { - switch (severity) - { - case Severity::kINTERNAL_ERROR: return "[F] "; - case Severity::kERROR: return "[E] "; - case Severity::kWARNING: return "[W] "; - case Severity::kINFO: return "[I] "; - case Severity::kVERBOSE: return "[V] "; - default: assert(0); return ""; - } - } - - bool mShouldLog; - Severity mSeverity; -}; // class LogStreamConsumer - -template -LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) -{ - if (logger.getShouldLog()) - { - std::lock_guard guard(logger.getMutex()); - auto& os = static_cast(logger); - os << obj; - } - return logger; -} - -//! -//! Special handling std::endl -//! -inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, std::ostream& (*f)(std::ostream&) ) -{ - if (logger.getShouldLog()) - { - std::lock_guard guard(logger.getMutex()); - auto& os = static_cast(logger); - os << f; - } - return logger; -} - -inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, const nvinfer1::Dims& dims) -{ - if (logger.getShouldLog()) - { - std::lock_guard guard(logger.getMutex()); - auto& os = static_cast(logger); - for (int32_t i = 0; i < dims.nbDims; ++i) - { - os << (i ? "x" : "") << dims.d[i]; - } - } - return logger; -} - -//! -//! \class Logger -//! -//! \brief Class which manages logging of TensorRT tools and samples -//! -//! \details This class provides a common interface for TensorRT tools and samples to log information to the console, -//! and supports logging two types of messages: -//! -//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) -//! - Test pass/fail messages -//! -//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is -//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. -//! -//! In the future, this class could be extended to support dumping test results to a file in some standard format -//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). -//! -//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger -//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT -//! library and messages coming from the sample. -//! -//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the -//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger -//! object. -//! -class Logger : public nvinfer1::ILogger -{ -public: - explicit Logger(Severity severity = Severity::kWARNING) - : mReportableSeverity(severity) - { - } - - //! - //! \enum TestResult - //! \brief Represents the state of a given test - //! - enum class TestResult - { - kRUNNING, //!< The test is running - kPASSED, //!< The test passed - kFAILED, //!< The test failed - kWAIVED //!< The test was waived - }; - - //! - //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger - //! \return The nvinfer1::ILogger associated with this Logger - //! - //! TODO Once all samples are updated to use this method to register the logger with TensorRT, - //! we can eliminate the inheritance of Logger from ILogger - //! - nvinfer1::ILogger& getTRTLogger() noexcept - { - return *this; - } - - //! - //! \brief Implementation of the nvinfer1::ILogger::log() virtual method - //! - //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the - //! inheritance from nvinfer1::ILogger - //! - void log(Severity severity, const char* msg) noexcept override - { - LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; - } - - //! - //! \brief Method for controlling the verbosity of logging output - //! - //! \param severity The logger will only emit messages that have severity of this level or higher. - //! - void setReportableSeverity(Severity severity) noexcept - { - mReportableSeverity = severity; - } - - //! - //! \brief Opaque handle that holds logging information for a particular test - //! - //! This object is an opaque handle to information used by the Logger to print test results. - //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used - //! with Logger::reportTest{Start,End}(). - //! - class TestAtom - { - public: - TestAtom(TestAtom&&) = default; - - private: - friend class Logger; - - TestAtom(bool started, const std::string& name, const std::string& cmdline) - : mStarted(started) - , mName(name) - , mCmdline(cmdline) - { - } - - bool mStarted; - std::string mName; - std::string mCmdline; - }; - - //! - //! \brief Define a test for logging - //! - //! \param[in] name The name of the test. This should be a string starting with - //! "TensorRT" and containing dot-separated strings containing - //! the characters [A-Za-z0-9_]. - //! For example, "TensorRT.sample_googlenet" - //! \param[in] cmdline The command line used to reproduce the test - // - //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - //! - static TestAtom defineTest(const std::string& name, const std::string& cmdline) - { - return TestAtom(false, name, cmdline); - } - - //! - //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments - //! as input - //! - //! \param[in] name The name of the test - //! \param[in] argc The number of command-line arguments - //! \param[in] argv The array of command-line arguments (given as C strings) - //! - //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - //! - static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv) - { - // Append TensorRT version as info - const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; - auto cmdline = genCmdlineString(argc, argv); - return defineTest(vname, cmdline); - } - - //! - //! \brief Report that a test has started. - //! - //! \pre reportTestStart() has not been called yet for the given testAtom - //! - //! \param[in] testAtom The handle to the test that has started - //! - static void reportTestStart(TestAtom& testAtom) - { - reportTestResult(testAtom, TestResult::kRUNNING); - assert(!testAtom.mStarted); - testAtom.mStarted = true; - } - - //! - //! \brief Report that a test has ended. - //! - //! \pre reportTestStart() has been called for the given testAtom - //! - //! \param[in] testAtom The handle to the test that has ended - //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, - //! TestResult::kFAILED, TestResult::kWAIVED - //! - static void reportTestEnd(TestAtom const& testAtom, TestResult result) - { - assert(result != TestResult::kRUNNING); - assert(testAtom.mStarted); - reportTestResult(testAtom, result); - } - - static int32_t reportPass(TestAtom const& testAtom) - { - reportTestEnd(testAtom, TestResult::kPASSED); - return EXIT_SUCCESS; - } - - static int32_t reportFail(TestAtom const& testAtom) - { - reportTestEnd(testAtom, TestResult::kFAILED); - return EXIT_FAILURE; - } - - static int32_t reportWaive(TestAtom const& testAtom) - { - reportTestEnd(testAtom, TestResult::kWAIVED); - return EXIT_SUCCESS; - } - - static int32_t reportTest(TestAtom const& testAtom, bool pass) - { - return pass ? reportPass(testAtom) : reportFail(testAtom); - } - - Severity getReportableSeverity() const - { - return mReportableSeverity; - } - -private: - //! - //! \brief returns an appropriate string for prefixing a log message with the given severity - //! - static const char* severityPrefix(Severity severity) - { - switch (severity) - { - case Severity::kINTERNAL_ERROR: return "[F] "; - case Severity::kERROR: return "[E] "; - case Severity::kWARNING: return "[W] "; - case Severity::kINFO: return "[I] "; - case Severity::kVERBOSE: return "[V] "; - default: assert(0); return ""; - } - } - - //! - //! \brief returns an appropriate string for prefixing a test result message with the given result - //! - static const char* testResultString(TestResult result) - { - switch (result) - { - case TestResult::kRUNNING: return "RUNNING"; - case TestResult::kPASSED: return "PASSED"; - case TestResult::kFAILED: return "FAILED"; - case TestResult::kWAIVED: return "WAIVED"; - default: assert(0); return ""; - } - } - - //! - //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity - //! - static std::ostream& severityOstream(Severity severity) - { - return severity >= Severity::kINFO ? std::cout : std::cerr; - } - - //! - //! \brief method that implements logging test results - //! - static void reportTestResult(TestAtom const& testAtom, TestResult result) - { - severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " - << testAtom.mCmdline << std::endl; - } - - //! - //! \brief generate a command line string from the given (argc, argv) values - //! - static std::string genCmdlineString(int32_t argc, char const* const* argv) - { - std::stringstream ss; - for (int32_t i = 0; i < argc; i++) - { - if (i > 0) - { - ss << " "; - } - ss << argv[i]; - } - return ss.str(); - } - - Severity mReportableSeverity; -}; // class Logger - -namespace -{ -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE -//! -//! Example usage: -//! -//! LOG_VERBOSE(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) -{ - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO -//! -//! Example usage: -//! -//! LOG_INFO(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_INFO(const Logger& logger) -{ - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING -//! -//! Example usage: -//! -//! LOG_WARN(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_WARN(const Logger& logger) -{ - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR -//! -//! Example usage: -//! -//! LOG_ERROR(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_ERROR(const Logger& logger) -{ - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR -//! ("fatal" severity) -//! -//! Example usage: -//! -//! LOG_FATAL(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_FATAL(const Logger& logger) -{ - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); -} -} // anonymous namespace -} // namespace sample -#endif // TENSORRT_LOGGING_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/parserOnnxConfig.h b/src/Detector/tensorrt_yolo/common_deprecated/parserOnnxConfig.h deleted file mode 100644 index c92a14202..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/parserOnnxConfig.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PARSER_ONNX_CONFIG_H -#define PARSER_ONNX_CONFIG_H - -#include -#include -#include - -#include "NvInfer.h" -#include "NvOnnxConfig.h" -#include "NvOnnxParser.h" - -#define ONNX_DEBUG 1 - -/** - * \class ParserOnnxConfig - * \brief Configuration Manager Class Concrete Implementation - * - * \note: - * - */ - -using namespace std; - -class ParserOnnxConfig : public nvonnxparser::IOnnxConfig -{ - -protected: - string mModelFilename{}; - string mTextFilename{}; - string mFullTextFilename{}; - nvinfer1::DataType mModelDtype; - nvonnxparser::IOnnxConfig::Verbosity mVerbosity; - bool mPrintLayercInfo; - -public: - ParserOnnxConfig() - : mModelDtype(nvinfer1::DataType::kFLOAT) - , mVerbosity(static_cast(nvinfer1::ILogger::Severity::kWARNING)) - , mPrintLayercInfo(false) - { -#ifdef ONNX_DEBUG - if (isDebug()) - { - std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl; - } -#endif - } - -protected: - ~ParserOnnxConfig() - { -#ifdef ONNX_DEBUG - if (isDebug()) - { - std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl; - } -#endif - } - -public: - virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept - { - mModelDtype = modelDtype; - } - - virtual nvinfer1::DataType getModelDtype() const noexcept - { - return mModelDtype; - } - - virtual const char* getModelFileName() const noexcept - { - return mModelFilename.c_str(); - } - virtual void setModelFileName(const char* onnxFilename) noexcept - { - mModelFilename = string(onnxFilename); - } - virtual nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept - { - return mVerbosity; - } - virtual void addVerbosity() noexcept - { - ++mVerbosity; - } - virtual void reduceVerbosity() noexcept - { - --mVerbosity; - } - virtual void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept - { - mVerbosity = verbosity; - } - - virtual const char* getTextFileName() const noexcept - { - return mTextFilename.c_str(); - } - virtual void setTextFileName(const char* textFilename) noexcept - { - mTextFilename = string(textFilename); - } - virtual const char* getFullTextFileName() const noexcept - { - return mFullTextFilename.c_str(); - } - virtual void setFullTextFileName(const char* fullTextFilename) noexcept - { - mFullTextFilename = string(fullTextFilename); - } - virtual bool getPrintLayerInfo() const noexcept - { - return mPrintLayercInfo; - } - virtual void setPrintLayerInfo(bool src) noexcept - { - mPrintLayercInfo = src; - } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() - - virtual bool isDebug() const noexcept - { -#if ONNX_DEBUG - return (std::getenv("ONNX_DEBUG") ? true : false); -#else - return false; -#endif - } - - virtual void destroy() noexcept - { - delete this; - } - -}; // class ParserOnnxConfig - -#endif diff --git a/src/Detector/tensorrt_yolo/common_deprecated/safeCommon.h b/src/Detector/tensorrt_yolo/common_deprecated/safeCommon.h deleted file mode 100644 index 3d84b095b..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/safeCommon.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORRT_SAFE_COMMON_H -#define TENSORRT_SAFE_COMMON_H - -#include "NvInferRuntimeCommon.h" -#include -#include -#include -#include -#include - -#define CHECK(status) \ - do \ - { \ - auto ret = (status); \ - if (ret != 0) \ - { \ - std::cerr << "Cuda failure: " << ret << std::endl; \ - abort(); \ - } \ - } while (0) - -namespace samplesCommon -{ -template -inline std::shared_ptr infer_object(T* obj) -{ - if (!obj) - { - throw std::runtime_error("Failed to create object"); - } - return std::shared_ptr(obj); -} - -inline uint32_t elementSize(nvinfer1::DataType t) -{ - switch (t) - { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: return 4; - case nvinfer1::DataType::kHALF: return 2; - case nvinfer1::DataType::kINT8: return 1; - case nvinfer1::DataType::kBOOL: return 1; - } - return 0; -} - -template -inline A divUp(A x, B n) -{ - return (x + n - 1) / n; -} - -} // namespace samplesCommon - -#endif // TENSORRT_SAFE_COMMON_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleConfig.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleConfig.h deleted file mode 100644 index 53a78331f..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleConfig.h +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef SampleConfig_H -#define SampleConfig_H - -#include -#include -#include - -#include "NvInfer.h" -#include "NvOnnxConfig.h" -class SampleConfig : public nvonnxparser::IOnnxConfig -{ -public: - enum class InputDataFormat : int - { - kASCII = 0, - kPPM = 1 - }; - -private: - std::string mModelFilename; - std::string mEngineFilename; - std::string mTextFilename; - std::string mFullTextFilename; - std::string mImageFilename; - std::string mReferenceFilename; - std::string mOutputFilename; - std::string mCalibrationFilename; - std::string mTimingCacheFilename; - int64_t mLabel{-1}; - int64_t mMaxBatchSize{32}; - int64_t mCalibBatchSize{0}; - int64_t mMaxNCalibBatch{0}; - int64_t mFirstCalibBatch{0}; - int64_t mUseDLACore{-1}; - nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT}; - bool mTF32{true}; - Verbosity mVerbosity{static_cast(nvinfer1::ILogger::Severity::kWARNING)}; - bool mPrintLayercInfo{false}; - bool mDebugBuilder{false}; - InputDataFormat mInputDataFormat{InputDataFormat::kASCII}; - uint64_t mTopK{0}; - float mFailurePercentage{-1.0f}; - float mTolerance{0.0f}; - float mAbsTolerance{1e-5f}; - -public: - SampleConfig() - { -#ifdef ONNX_DEBUG - if (isDebug()) - { - std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl; - } -#endif - } - -protected: - ~SampleConfig() - { -#ifdef ONNX_DEBUG - if (isDebug()) - { - std::cout << "SampleConfig::dtor(): " << this << std::endl; - } -#endif - } - -public: - void setModelDtype(const nvinfer1::DataType mdt) noexcept - { - mModelDtype = mdt; - } - - nvinfer1::DataType getModelDtype() const noexcept - { - return mModelDtype; - } - - bool getTF32() const noexcept - { - return mTF32; - } - - void setTF32(bool enabled) noexcept - { - mTF32 = enabled; - } - - const char* getModelFileName() const noexcept - { - return mModelFilename.c_str(); - } - - void setModelFileName(const char* onnxFilename) noexcept - { - mModelFilename = std::string(onnxFilename); - } - Verbosity getVerbosityLevel() const noexcept - { - return mVerbosity; - } - void addVerbosity() noexcept - { - ++mVerbosity; - } - void reduceVerbosity() noexcept - { - --mVerbosity; - } - virtual void setVerbosityLevel(Verbosity v) noexcept - { - mVerbosity = v; - } - const char* getEngineFileName() const noexcept - { - return mEngineFilename.c_str(); - } - void setEngineFileName(const char* engineFilename) noexcept - { - mEngineFilename = std::string(engineFilename); - } - const char* getTextFileName() const noexcept - { - return mTextFilename.c_str(); - } - void setTextFileName(const char* textFilename) noexcept - { - mTextFilename = std::string(textFilename); - } - const char* getFullTextFileName() const noexcept - { - return mFullTextFilename.c_str(); - } - void setFullTextFileName(const char* fullTextFilename) noexcept - { - mFullTextFilename = std::string(fullTextFilename); - } - void setLabel(int64_t label) noexcept - { - mLabel = label; - } //!< set the Label - - int64_t getLabel() const noexcept - { - return mLabel; - } //!< get the Label - - bool getPrintLayerInfo() const noexcept - { - return mPrintLayercInfo; - } - - void setPrintLayerInfo(bool b) noexcept - { - mPrintLayercInfo = b; - } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() - - void setMaxBatchSize(int64_t maxBatchSize) noexcept - { - mMaxBatchSize = maxBatchSize; - } //!< set the Max Batch Size - int64_t getMaxBatchSize() const noexcept - { - return mMaxBatchSize; - } //!< get the Max Batch Size - - void setCalibBatchSize(int64_t CalibBatchSize) noexcept - { - mCalibBatchSize = CalibBatchSize; - } //!< set the calibration batch size - int64_t getCalibBatchSize() const noexcept - { - return mCalibBatchSize; - } //!< get calibration batch size - - void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept - { - mMaxNCalibBatch = MaxNCalibBatch; - } //!< set Max Number of Calibration Batches - int64_t getMaxNCalibBatch() const noexcept - { - return mMaxNCalibBatch; - } //!< get the Max Number of Calibration Batches - - void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept - { - mFirstCalibBatch = FirstCalibBatch; - } //!< set the first calibration batch - int64_t getFirstCalibBatch() const noexcept - { - return mFirstCalibBatch; - } //!< get the first calibration batch - - void setUseDLACore(int64_t UseDLACore) noexcept - { - mUseDLACore = UseDLACore; - } //!< set the DLA core to use - int64_t getUseDLACore() const noexcept - { - return mUseDLACore; - } //!< get the DLA core to use - - void setDebugBuilder() noexcept - { - mDebugBuilder = true; - } //!< enable the Debug info, while building the engine. - bool getDebugBuilder() const noexcept - { - return mDebugBuilder; - } //!< get the boolean variable, corresponding to the debug builder - - const char* getImageFileName() const noexcept //!< set Image file name (PPM or ASCII) - { - return mImageFilename.c_str(); - } - void setImageFileName(const char* imageFilename) noexcept //!< get the Image file name - { - mImageFilename = std::string(imageFilename); - } - const char* getReferenceFileName() const noexcept - { - return mReferenceFilename.c_str(); - } - void setReferenceFileName(const char* referenceFilename) noexcept //!< set reference file name - { - mReferenceFilename = std::string(referenceFilename); - } - - void setInputDataFormat(InputDataFormat idt) noexcept - { - mInputDataFormat = idt; - } //!< specifies expected data format of the image file (PPM or ASCII) - InputDataFormat getInputDataFormat() const noexcept - { - return mInputDataFormat; - } //!< returns the expected data format of the image file. - - const char* getOutputFileName() const noexcept //!< specifies the file to save the results - { - return mOutputFilename.c_str(); - } - void setOutputFileName(const char* outputFilename) noexcept //!< get the output file name - { - mOutputFilename = std::string(outputFilename); - } - - const char* getCalibrationFileName() const noexcept - { - return mCalibrationFilename.c_str(); - } //!< specifies the file containing the list of image files for int8 calibration - void setCalibrationFileName(const char* calibrationFilename) noexcept //!< get the int 8 calibration list file name - { - mCalibrationFilename = std::string(calibrationFilename); - } - - uint64_t getTopK() const noexcept - { - return mTopK; - } - void setTopK(uint64_t topK) noexcept - { - mTopK = topK; - } //!< If this options is specified, return the K top probabilities. - - float getFailurePercentage() const noexcept - { - return mFailurePercentage; - } - - void setFailurePercentage(float f) noexcept - { - mFailurePercentage = f; - } - - float getAbsoluteTolerance() const noexcept - { - return mAbsTolerance; - } - - void setAbsoluteTolerance(float a) noexcept - { - mAbsTolerance = a; - } - - float getTolerance() const noexcept - { - return mTolerance; - } - - void setTolerance(float t) noexcept - { - mTolerance = t; - } - - const char* getTimingCacheFilename() const noexcept - { - return mTimingCacheFilename.c_str(); - } - - void setTimingCacheFileName(const char* timingCacheFilename) noexcept - { - mTimingCacheFilename = std::string(timingCacheFilename); - } - - bool isDebug() const noexcept - { -#if ONNX_DEBUG - return (std::getenv("ONNX_DEBUG") ? true : false); -#else - return false; -#endif - } - - void destroy() noexcept - { - delete this; - } - -}; // class SampleConfig - -#endif diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleDevice.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleDevice.h deleted file mode 100644 index 2053ac7c5..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleDevice.h +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_DEVICE_H -#define TRT_SAMPLE_DEVICE_H - -#include -#include -#include -#include -#include - -namespace sample -{ - -inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) -{ - if (ret != cudaSuccess) - { - err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl; - abort(); - } -} - -class TrtCudaEvent; - -namespace -{ - -void cudaSleep(void* sleep) -{ - std::this_thread::sleep_for(std::chrono::duration(*static_cast(sleep))); -} - -} // namespace - -//! -//! \class TrtCudaStream -//! \brief Managed CUDA stream -//! -class TrtCudaStream -{ -public: - TrtCudaStream() - { - cudaCheck(cudaStreamCreate(&mStream)); - } - - TrtCudaStream(const TrtCudaStream&) = delete; - - TrtCudaStream& operator=(const TrtCudaStream&) = delete; - - TrtCudaStream(TrtCudaStream&&) = delete; - - TrtCudaStream& operator=(TrtCudaStream&&) = delete; - - ~TrtCudaStream() - { - cudaCheck(cudaStreamDestroy(mStream)); - } - - cudaStream_t get() const - { - return mStream; - } - - void synchronize() - { - cudaCheck(cudaStreamSynchronize(mStream)); - } - - void wait(TrtCudaEvent& event); - - void sleep(float* ms) - { - cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms)); - } - -private: - cudaStream_t mStream{}; -}; - -//! -//! \class TrtCudaEvent -//! \brief Managed CUDA event -//! -class TrtCudaEvent -{ -public: - explicit TrtCudaEvent(bool blocking = true) - { - const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault; - cudaCheck(cudaEventCreateWithFlags(&mEvent, flags)); - } - - TrtCudaEvent(const TrtCudaEvent&) = delete; - - TrtCudaEvent& operator=(const TrtCudaEvent&) = delete; - - TrtCudaEvent(TrtCudaEvent&&) = delete; - - TrtCudaEvent& operator=(TrtCudaEvent&&) = delete; - - ~TrtCudaEvent() - { - cudaCheck(cudaEventDestroy(mEvent)); - } - - cudaEvent_t get() const - { - return mEvent; - } - - void record(const TrtCudaStream& stream) - { - cudaCheck(cudaEventRecord(mEvent, stream.get())); - } - - void synchronize() - { - cudaCheck(cudaEventSynchronize(mEvent)); - } - - // Returns time elapsed time in milliseconds - float operator-(const TrtCudaEvent& e) const - { - float time{0}; - cudaCheck(cudaEventElapsedTime(&time, e.get(), get())); - return time; - } - -private: - cudaEvent_t mEvent{}; -}; - -inline void TrtCudaStream::wait(TrtCudaEvent& event) -{ - cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0)); -} - -//! -//! \class TrtCudaGraph -//! \brief Managed CUDA graph -//! -class TrtCudaGraph -{ -public: - explicit TrtCudaGraph() = default; - - TrtCudaGraph(const TrtCudaGraph&) = delete; - - TrtCudaGraph& operator=(const TrtCudaGraph&) = delete; - - TrtCudaGraph(TrtCudaGraph&&) = delete; - - TrtCudaGraph& operator=(TrtCudaGraph&&) = delete; - - ~TrtCudaGraph() - { - if (mGraphExec) - { - cudaGraphExecDestroy(mGraphExec); - } - } - - void beginCapture(TrtCudaStream& stream) - { - cudaCheck(cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal)); - } - - bool launch(TrtCudaStream& stream) - { - return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess; - } - - void endCapture(TrtCudaStream& stream) - { - cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph)); - cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0)); - cudaCheck(cudaGraphDestroy(mGraph)); - } - - void endCaptureOnError(TrtCudaStream& stream) - { - // There are two possibilities why stream capture would fail: - // (1) stream is in cudaErrorStreamCaptureInvalidated state. - // (2) TRT reports a failure. - // In case (1), the returning mGraph should be nullptr. - // In case (2), the returning mGraph is not nullptr, but it should not be used. - const auto ret = cudaStreamEndCapture(stream.get(), &mGraph); - if (ret == cudaErrorStreamCaptureInvalidated) - { - assert(mGraph == nullptr); - } - else - { - assert(ret == cudaSuccess); - assert(mGraph != nullptr); - cudaCheck(cudaGraphDestroy(mGraph)); - mGraph = nullptr; - } - // Clean up any CUDA error. - cudaGetLastError(); - sample::gLogWarning << "The CUDA graph capture on the stream has failed." << std::endl; - } - -private: - cudaGraph_t mGraph{}; - cudaGraphExec_t mGraphExec{}; -}; - -//! -//! \class TrtCudaBuffer -//! \brief Managed buffer for host and device -//! -template -class TrtCudaBuffer -{ -public: - TrtCudaBuffer() = default; - - TrtCudaBuffer(const TrtCudaBuffer&) = delete; - - TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete; - - TrtCudaBuffer(TrtCudaBuffer&& rhs) - { - reset(rhs.mPtr); - rhs.mPtr = nullptr; - } - - TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) - { - if (this != &rhs) - { - reset(rhs.mPtr); - rhs.mPtr = nullptr; - } - return *this; - } - - ~TrtCudaBuffer() - { - reset(); - } - - TrtCudaBuffer(size_t size) - { - A()(&mPtr, size); - } - - void allocate(size_t size) - { - reset(); - A()(&mPtr, size); - } - - void reset(void* ptr = nullptr) - { - if (mPtr) - { - D()(mPtr); - } - mPtr = ptr; - } - - void* get() const - { - return mPtr; - } - -private: - void* mPtr{nullptr}; -}; - -struct DeviceAllocator -{ - void operator()(void** ptr, size_t size) - { - cudaCheck(cudaMalloc(ptr, size)); - } -}; - -struct DeviceDeallocator -{ - void operator()(void* ptr) - { - cudaCheck(cudaFree(ptr)); - } -}; - -struct ManagedAllocator -{ - void operator()(void** ptr, size_t size) - { - cudaCheck(cudaMallocManaged(ptr, size)); - } -}; - -struct HostAllocator -{ - void operator()(void** ptr, size_t size) - { - cudaCheck(cudaMallocHost(ptr, size)); - } -}; - -struct HostDeallocator -{ - void operator()(void* ptr) - { - cudaCheck(cudaFreeHost(ptr)); - } -}; - -using TrtDeviceBuffer = TrtCudaBuffer; -using TrtManagedBuffer = TrtCudaBuffer; - -using TrtHostBuffer = TrtCudaBuffer; - -//! -//! \class MirroredBuffer -//! \brief Coupled host and device buffers -//! -class IMirroredBuffer -{ -public: - //! - //! Allocate memory for the mirrored buffer give the size - //! of the allocation. - //! - virtual void allocate(size_t size) = 0; - - //! - //! Get the pointer to the device side buffer. - //! - //! \return pointer to device memory or nullptr if uninitialized. - //! - virtual void* getDeviceBuffer() const = 0; - - //! - //! Get the pointer to the host side buffer. - //! - //! \return pointer to host memory or nullptr if uninitialized. - //! - virtual void* getHostBuffer() const = 0; - - //! - //! Copy the memory from host to device. - //! - virtual void hostToDevice(TrtCudaStream& stream) = 0; - - //! - //! Copy the memory from device to host. - //! - virtual void deviceToHost(TrtCudaStream& stream) = 0; - - //! - //! Interface to get the size of the memory - //! - //! \return the size of memory allocated. - //! - virtual size_t getSize() const = 0; - - //! - //! Virtual destructor declaraion - //! - virtual ~IMirroredBuffer() = default; - -}; // class IMirroredBuffer - -//! -//! Class to have a seperate memory buffer for discrete device and host allocations. -//! -class DiscreteMirroredBuffer : public IMirroredBuffer -{ -public: - void allocate(size_t size) - { - mSize = size; - mHostBuffer.allocate(size); - mDeviceBuffer.allocate(size); - } - - void* getDeviceBuffer() const - { - return mDeviceBuffer.get(); - } - - void* getHostBuffer() const - { - return mHostBuffer.get(); - } - - void hostToDevice(TrtCudaStream& stream) - { - cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, cudaMemcpyHostToDevice, stream.get())); - } - - void deviceToHost(TrtCudaStream& stream) - { - cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, cudaMemcpyDeviceToHost, stream.get())); - } - - size_t getSize() const - { - return mSize; - } - -private: - size_t mSize{0}; - TrtHostBuffer mHostBuffer; - TrtDeviceBuffer mDeviceBuffer; -}; // class DiscreteMirroredBuffer - -//! -//! Class to have a unified memory buffer for embedded devices. -//! -class UnifiedMirroredBuffer : public IMirroredBuffer -{ -public: - void allocate(size_t size) - { - mSize = size; - mBuffer.allocate(size); - } - - void* getDeviceBuffer() const - { - return mBuffer.get(); - } - - void* getHostBuffer() const - { - return mBuffer.get(); - } - - void hostToDevice(TrtCudaStream& /*stream*/) - { - // Does nothing since we are using unified memory. - } - - void deviceToHost(TrtCudaStream& /*stream*/) - { - // Does nothing since we are using unified memory. - } - - size_t getSize() const - { - return mSize; - } - -private: - size_t mSize{0}; - TrtManagedBuffer mBuffer; -}; // class UnifiedMirroredBuffer - -inline void setCudaDevice(int device, std::ostream& os) -{ - cudaCheck(cudaSetDevice(device)); - - cudaDeviceProp properties; - cudaCheck(cudaGetDeviceProperties(&properties, device)); - -// clang-format off - os << "=== Device Information ===" << std::endl; - os << "Selected Device: " << properties.name << std::endl; - os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl; - os << "SMs: " << properties.multiProcessorCount << std::endl; - os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; - os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl; - os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; - os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" - << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; - os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; - // clang-format on -} - -} // namespace sample - -#endif // TRT_SAMPLE_DEVICE_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.cpp b/src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.cpp deleted file mode 100644 index 8bb8a8fe4..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.cpp +++ /dev/null @@ -1,1629 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NvInfer.h" -#include "NvOnnxParser.h" - -#include "common.h" -#include "ErrorRecorder.h" -#include "half.h" -#include "logger.h" -#include "sampleEngines.h" -#include "sampleOptions.h" -#include "sampleUtils.h" - -#if !defined(_WIN32) -#include -#endif - -namespace sample -{ - -namespace -{ - -std::map readScalesFromCalibrationCache(const std::string& calibrationFile) -{ - std::map tensorScales; - std::ifstream cache{calibrationFile}; - if (!cache.is_open()) - { - sample::gLogError << "[TRT] Can not open provided calibration cache file" << std::endl; - return tensorScales; - } - std::string line; - while (std::getline(cache, line)) - { - auto colonPos = line.find_last_of(':'); - if (colonPos != std::string::npos) - { - // Scales should be stored in calibration cache as 32-bit floating numbers encoded as 32-bit integers - int32_t scalesAsInt = std::stoi(line.substr(colonPos + 2, 8), nullptr, 16); - const auto tensorName = line.substr(0, colonPos); - tensorScales[tensorName] = *reinterpret_cast(&scalesAsInt); - } - } - cache.close(); - return tensorScales; -} -} // namespace - -void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, const std::vector& inputFormats, - const std::vector& outputFormats, const std::string& calibrationFile) -{ - const auto tensorScales = readScalesFromCalibrationCache(calibrationFile); - const bool broadcastInputFormats = broadcastIOFormats(inputFormats, network.getNbInputs()); - for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i) - { - int32_t formatIdx = broadcastInputFormats ? 0 : i; - if (!inputFormats.empty() && inputFormats[formatIdx].first == nvinfer1::DataType::kINT8) - { - auto* input = network.getInput(i); - const auto calibScale = tensorScales.at(input->getName()); - input->setDynamicRange(-127 * calibScale, 127 * calibScale); - } - } - const bool broadcastOutputFormats = broadcastIOFormats(outputFormats, network.getNbInputs()); - for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i) - { - int32_t formatIdx = broadcastOutputFormats ? 0 : i; - if (!outputFormats.empty() && outputFormats[formatIdx].first == nvinfer1::DataType::kINT8) - { - auto* output = network.getOutput(i); - const auto calibScale = tensorScales.at(output->getName()); - output->setDynamicRange(-127 * calibScale, 127 * calibScale); - } - } -} - -#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \ - { \ - if ((condition) == false) \ - { \ - (err) << (msg) << std::endl; \ - return retval; \ - } \ - } - -Parser modelToNetwork(const ModelOptions& model, nvinfer1::INetworkDefinition& network, std::ostream& err) -{ - sample::gLogInfo << "Start parsing network model" << std::endl; - Parser parser; - //const std::string& modelName = model.baseModel.model; - switch (model.baseModel.format) - { - case ModelFormat::kONNX: - { - using namespace nvonnxparser; - parser.onnxParser.reset(createParser(network, sample::gLogger.getTRTLogger())); - if (!parser.onnxParser->parseFromFile( - model.baseModel.model.c_str(), static_cast(sample::gLogger.getReportableSeverity()))) - { - err << "Failed to parse onnx file" << std::endl; - parser.onnxParser.reset(); - } - break; - } - case ModelFormat::kANY: - break; - default: - break; - } - - sample::gLogInfo << "Finish parsing network model" << std::endl; - return parser; -} - -namespace -{ - -class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 -{ -public: - RndInt8Calibrator(int batches, std::vector& elemCount, const std::string& cacheFile, - const nvinfer1::INetworkDefinition& network, std::ostream& err); - - ~RndInt8Calibrator() - { - for (auto& elem : mInputDeviceBuffers) - { - cudaCheck(cudaFree(elem.second), mErr); - } - } - - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; - - int getBatchSize() const noexcept override - { - return 1; - } - - const void* readCalibrationCache(size_t& length) noexcept override; - - virtual void writeCalibrationCache(const void*, size_t) noexcept override {} - -private: - int mBatches{}; - int mCurrentBatch{}; - std::string mCacheFile; - std::map mInputDeviceBuffers; - std::vector mCalibrationCache; - std::ostream& mErr; -}; - -RndInt8Calibrator::RndInt8Calibrator(int batches, std::vector& elemCount, const std::string& cacheFile, - const nvinfer1::INetworkDefinition& network, std::ostream& err) - : mBatches(batches) - , mCurrentBatch(0) - , mCacheFile(cacheFile) - , mErr(err) -{ - std::ifstream tryCache(cacheFile, std::ios::binary); - if (tryCache.good()) - { - return; - } - - std::default_random_engine generator; - std::uniform_real_distribution distribution(-1.0F, 1.0F); - auto gen = [&generator, &distribution]() { return distribution(generator); }; - - for (int i = 0; i < network.getNbInputs(); i++) - { - auto* input = network.getInput(i); - std::vector rnd_data(elemCount[i]); - std::generate_n(rnd_data.begin(), elemCount[i], gen); - - void* data; - cudaCheck(cudaMalloc(&data, elemCount[i] * sizeof(float)), mErr); - cudaCheck(cudaMemcpy(data, rnd_data.data(), elemCount[i] * sizeof(float), cudaMemcpyHostToDevice), mErr); - - mInputDeviceBuffers.insert(std::make_pair(input->getName(), data)); - } -} - -bool RndInt8Calibrator::getBatch(void* bindings[], const char* names[], int nbBindings) noexcept -{ - if (mCurrentBatch >= mBatches) - { - return false; - } - - for (int i = 0; i < nbBindings; ++i) - { - bindings[i] = mInputDeviceBuffers[names[i]]; - } - - ++mCurrentBatch; - - return true; -} - -const void* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept -{ - mCalibrationCache.clear(); - std::ifstream input(mCacheFile, std::ios::binary); - input >> std::noskipws; - if (input.good()) - { - std::copy( - std::istream_iterator(input), std::istream_iterator(), std::back_inserter(mCalibrationCache)); - } - - length = mCalibrationCache.size(); - return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr; -} - -bool setTensorDynamicRange(const nvinfer1::INetworkDefinition& network, float inRange = 2.0F, float outRange = 4.0F) -{ - // Ensure that all layer inputs have a dynamic range. - for (int l = 0; l < network.getNbLayers(); l++) - { - auto* layer = network.getLayer(l); - for (int i = 0; i < layer->getNbInputs(); i++) - { - nvinfer1::ITensor* input{layer->getInput(i)}; - // Optional inputs are nullptr here and are from RNN layers. - if (input && !input->dynamicRangeIsSet()) - { - // Concat should propagate dynamic range from outputs to inputs to avoid - // Re-quantization during the concatenation - auto dynRange = (layer->getType() == nvinfer1::LayerType::kCONCATENATION) ? outRange : inRange; - if (!input->setDynamicRange(-dynRange, dynRange)) - { - return false; - } - } - } - for (int o = 0; o < layer->getNbOutputs(); o++) - { - nvinfer1::ITensor* output{layer->getOutput(o)}; - // Optional outputs are nullptr here and are from RNN layers. - if (output && !output->dynamicRangeIsSet()) - { - // Pooling must have the same input and output dynamic range. - if (layer->getType() == nvinfer1::LayerType::kPOOLING) - { - if (!output->setDynamicRange(-inRange, inRange)) - { - return false; - } - } - else - { - if (!output->setDynamicRange(-outRange, outRange)) - { - return false; - } - } - } - } - } - return true; -} - -// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0. -template -void sparsify(const T* values, int64_t count, int32_t k, int32_t rs, std::vector& sparseWeights) -{ - const auto c = count / (k * rs); - sparseWeights.resize(count * sizeof(T)); - auto* sparseValues = reinterpret_cast(sparseWeights.data()); - - constexpr int32_t window = 4; - constexpr int32_t nonzeros = 2; - - const int32_t crs = c * rs; - const auto getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * rs + rsi; }; - - for (int64_t ki = 0; ki < k; ++ki) - { - for (int64_t rsi = 0; rsi < rs; ++rsi) - { - int32_t w = 0; - int32_t nz = 0; - for (int64_t ci = 0; ci < c; ++ci) - { - const auto index = getIndex(ki, ci, rsi); - if (nz < nonzeros) - { - sparseValues[index] = values[index]; - ++nz; - } - else - { - sparseValues[index] = 0; - } - if (++w == window) - { - w = 0; - nz = 0; - } - } - } - } -} - -void sparsify(const nvinfer1::Weights& weights, int32_t k, int32_t rs, std::vector& sparseWeights) -{ - switch (weights.type) - { - case nvinfer1::DataType::kFLOAT: - sparsify(static_cast(weights.values), weights.count, k, rs, sparseWeights); - break; - case nvinfer1::DataType::kHALF: - sparsify(static_cast(weights.values), weights.count, k, rs, sparseWeights); - break; - case nvinfer1::DataType::kINT8: - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kBOOL: break; - } -} - -template -void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector& sparseWeights) -{ - auto weights = l.getKernelWeights(); - sparsify(weights, k, rs, sparseWeights); - weights.values = sparseWeights.data(); - l.setKernelWeights(weights); -} - -template -void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n) -{ - ASSERT(dst != src); - T* tdst = reinterpret_cast(dst); - T const* tsrc = reinterpret_cast(src); - for (int32_t mi = 0; mi < m; ++mi) - { - for (int32_t ni = 0; ni < n; ++ni) - { - int32_t const isrc = mi * n + ni; - int32_t const idst = ni * m + mi; - tdst[idst] = tsrc[isrc]; - } - } -} - -// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers. -// Forward analysis on the API graph to determine which weights to sparsify. -void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) -{ - using TensorToLayer = std::unordered_map; - using LayerToTensor = std::unordered_map; - - // 1. Collect layers and tensors information from the network. - TensorToLayer matmulI2L; - TensorToLayer constO2L; - TensorToLayer shuffleI2L; - LayerToTensor shuffleL2O; - auto collectMappingInfo = [&](int32_t const idx) { - nvinfer1::ILayer* l = network.getLayer(idx); - switch (l->getType()) - { - case nvinfer1::LayerType::kMATRIX_MULTIPLY: - { - // assume weights on the second input. - matmulI2L.insert({l->getInput(1), l}); - break; - } - case nvinfer1::LayerType::kCONSTANT: - { - nvinfer1::DataType const dtype = static_cast(l)->getWeights().type; - if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF) - { - // Sparsify float only. - constO2L.insert({l->getOutput(0), l}); - } - break; - } - case nvinfer1::LayerType::kSHUFFLE: - { - shuffleI2L.insert({l->getInput(0), l}); - shuffleL2O.insert({l, l->getOutput(0)}); - break; - } - default: break; - } - }; - int32_t const nbLayers = network.getNbLayers(); - for (int32_t i = 0; i < nbLayers; ++i) - { - collectMappingInfo(i); - } - if (matmulI2L.size() == 0 || constO2L.size() == 0) - { - // No MatrixMultiply or Constant layer found, no weights to sparsify. - return; - } - - // Helper for analysis - auto isTranspose = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; - auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; - auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool { - for (int32_t i = 0; i < dims.nbDims; ++i) - { - if (dims.d[i] != i || dims.d[i] != -1) - { - return false; - } - } - return true; - }; - auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) - { - while (shuffleI2L.find(t) != shuffleI2L.end()) - { - nvinfer1::IShuffleLayer* s = static_cast(shuffleI2L.at(t)); - if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions()) - || !isIdenticalReshape(s->getReshapeDimensions())) - { - break; - } - - if (isTranspose(s->getFirstTranspose())) - needTranspose = !needTranspose; - if (isTranspose(s->getSecondTranspose())) - needTranspose = !needTranspose; - - t = shuffleL2O.at(s); - } - return t; - }; - - // 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose - std::unordered_map constantLayerToSparse; - for (auto& o2l : constO2L) - { - // If need to transpose the weights of the Constant layer. - // Need to transpose by default due to semantic difference. - bool needTranspose{true}; - nvinfer1::ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); - if (matmulI2L.find(t) == matmulI2L.end()) - { - continue; - } - - // check MatMul params... - nvinfer1::IMatrixMultiplyLayer* mm = static_cast(matmulI2L.at(t)); - bool const twoInputs = mm->getNbInputs() == 2; - bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions()); - bool const isSimple - = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE && mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR; - if (!(twoInputs && all2D && isSimple)) - continue; - - if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE) - needTranspose = !needTranspose; - - constantLayerToSparse.insert({static_cast(o2l.second), needTranspose}); - } - - // 3. Finally, sparsify the weights - auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) - { - nvinfer1::Dims dims = layer->getOutput(0)->getDimensions(); - ASSERT(dims.nbDims == 2); - int32_t const idxN = needTranspose ? 1 : 0; - int32_t const n = dims.d[idxN]; - int32_t const k = dims.d[1 - idxN]; - sparseWeights.emplace_back(); - std::vector& spw = sparseWeights.back(); - nvinfer1::Weights w = layer->getWeights(); - nvinfer1::DataType const dtype = w.type; - ASSERT(dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored. - - if (needTranspose) - { - if (dtype == nvinfer1::DataType::kFLOAT) - { - spw.resize(w.count * sizeof(float)); - transpose2DWeights(spw.data(), w.values, k, n); - } - else if (dtype == nvinfer1::DataType::kHALF) - { - spw.resize(w.count * sizeof(half_float::half)); - transpose2DWeights(spw.data(), w.values, k, n); - } - - w.values = spw.data(); - std::vector tmpW; - sparsify(w, n, 1, tmpW); - - if (dtype == nvinfer1::DataType::kFLOAT) - transpose2DWeights(spw.data(), tmpW.data(), n, k); - else if (dtype == nvinfer1::DataType::kHALF) - transpose2DWeights(spw.data(), tmpW.data(), n, k); - } - else - { - sparsify(w, n, 1, spw); - } - - w.values = spw.data(); - layer->setWeights(w); - }; - for (auto& l : constantLayerToSparse) - { - sparsifyConstantWeights(l.first, l.second); - } -} - -void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) -{ - for (int32_t l = 0; l < network.getNbLayers(); ++l) - { - auto* layer = network.getLayer(l); - const auto t = layer->getType(); - if (t == nvinfer1::LayerType::kCONVOLUTION) - { - auto& conv = *static_cast(layer); - const auto& dims = conv.getKernelSizeNd(); - if (dims.nbDims > 2) - { - continue; - } - const auto k = conv.getNbOutputMaps(); - const auto rs = dims.d[0] * dims.d[1]; - sparseWeights.emplace_back(); - setSparseWeights(conv, k, rs, sparseWeights.back()); - } - else if (t == nvinfer1::LayerType::kFULLY_CONNECTED) - { - auto& fc = *static_cast(layer); - const auto k = fc.getNbOutputChannels(); - sparseWeights.emplace_back(); - setSparseWeights(fc, k, 1, sparseWeights.back()); - } - } - - sparsifyMatMulKernelWeights(network, sparseWeights); -} - -void setLayerPrecisions(nvinfer1::INetworkDefinition& network, LayerPrecisions const& layerPrecisions) -{ - bool const hasGlobalPrecision{layerPrecisions.find("*") != layerPrecisions.end()}; - auto const globalPrecision = hasGlobalPrecision ? layerPrecisions.at("*") : nvinfer1::DataType::kFLOAT; - bool hasLayerPrecisionSkipped{false}; - for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) - { - auto* layer = network.getLayer(layerIdx); - auto const layerName = layer->getName(); - if (layerPrecisions.find(layer->getName()) != layerPrecisions.end()) - { - layer->setPrecision(layerPrecisions.at(layer->getName())); - } - else if (hasGlobalPrecision) - { - // We should not set the layer precision if its default precision is INT32 or Bool. - if (layer->getPrecision() == nvinfer1::DataType::kINT32 - || layer->getPrecision() == nvinfer1::DataType::kBOOL) - { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because the " - << " default layer precision is INT32 or Bool." << std::endl; - continue; - } - // We should not set the constant layer precision if its weights are in INT32. - if (layer->getType() == nvinfer1::LayerType::kCONSTANT - && static_cast(layer)->getWeights().type == nvinfer1::DataType::kINT32) - { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this " - << "constant layer has INT32 weights." << std::endl; - continue; - } - // We should not set the layer precision if the layer operates on a shape tensor. - if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor()) - { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this layer " - << "operates on a shape tensor." << std::endl; - continue; - } - if ((layer->getType() == nvinfer1::LayerType::kIDENTITY - || layer->getType() == nvinfer1::LayerType::kSHUFFLE) - && layer->getNbInputs() >= 1 && layer->getInput(0)->getType() == nvinfer1::DataType::kINT32 - && layer->getNbOutputs() >= 1 && layer->getOutput(0)->getType() == nvinfer1::DataType::kINT32) - { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this " - << "layer has INT32 input and output." << std::endl; - continue; - } - // All heuristics passed. Set the layer precision. - layer->setPrecision(globalPrecision); - } - } - - if (hasLayerPrecisionSkipped) - { - sample::gLogInfo << "Skipped setting precisions for some layers. Check verbose logs for more details." - << std::endl; - } -} - -void setLayerOutputTypes(nvinfer1::INetworkDefinition& network, LayerOutputTypes const& layerOutputTypes) -{ - bool const hasGlobalOutputType{layerOutputTypes.find("*") != layerOutputTypes.end()}; - auto const globalOutputType = hasGlobalOutputType ? layerOutputTypes.at("*").at(0) : nvinfer1::DataType::kFLOAT; - bool hasLayerOutputTypeSkipped{false}; - for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) - { - auto* layer = network.getLayer(layerIdx); - auto const layerName = layer->getName(); - auto const nbOutputs = layer->getNbOutputs(); - if (layerOutputTypes.find(layer->getName()) != layerOutputTypes.end()) - { - auto const& outputTypes = layerOutputTypes.at(layer->getName()); - bool const isBroadcast = (outputTypes.size() == 1); - if (!isBroadcast && static_cast(outputTypes.size()) != nbOutputs) - { - sample::gLogError << "Layer " << layerName << " has " << nbOutputs << " outputs but " - << outputTypes.size() << " output types are given in --layerOutputTypes flag." - << std::endl; - throw std::invalid_argument("Invalid --layerOutputTypes flag."); - } - for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) - { - layer->setOutputType(outputIdx, outputTypes.at(isBroadcast ? 0 : outputIdx)); - } - } - else if (hasGlobalOutputType) - { - // We should not set the layer output types if its default precision is INT32 or Bool. - if (layer->getPrecision() == nvinfer1::DataType::kINT32 - || layer->getPrecision() == nvinfer1::DataType::kBOOL) - { - hasLayerOutputTypeSkipped = true; - sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because the " - << " default layer precision is INT32 or Bool." << std::endl; - continue; - } - // We should not set the constant layer output types if its weights are in INT32. - if (layer->getType() == nvinfer1::LayerType::kCONSTANT - && static_cast(layer)->getWeights().type == nvinfer1::DataType::kINT32) - { - hasLayerOutputTypeSkipped = true; - sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because this " - << "constant layer has INT32 weights." << std::endl; - continue; - } - for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) - { - // We should not set the output type if the output is a shape tensor. - if (layer->getOutput(0)->isShapeTensor()) - { - hasLayerOutputTypeSkipped = true; - sample::gLogVerbose << "Skipped setting output type for output " << outputIdx << " of layer " - << layerName << " because it is a shape tensor." << std::endl; - continue; - } - layer->setOutputType(outputIdx, globalOutputType); - } - } - } - - if (hasLayerOutputTypeSkipped) - { - sample::gLogInfo << "Skipped setting output types for some layers. Check verbose logs for more details." - << std::endl; - } -} - -void setMemoryPoolLimits(nvinfer1::IBuilderConfig& config, BuildOptions const& build) -{ - auto const roundToBytes = [](double const sizeInMB) { return static_cast(sizeInMB * (1 << 20)); }; - if (build.workspace >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); - if (build.dlaSRAM >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, roundToBytes(build.dlaSRAM)); - if (build.dlaLocalDRAM >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, roundToBytes(build.dlaLocalDRAM)); - if (build.dlaGlobalDRAM >= 0) - config.setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, roundToBytes(build.dlaGlobalDRAM)); -} - -} // namespace - -bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, nvinfer1::IBuilderConfig& config, std::ostream& err, - std::vector>& sparseWeights) -{ - nvinfer1::IOptimizationProfile* profile{nullptr}; - if (build.maxBatch) - builder.setMaxBatchSize(build.maxBatch); - else - profile = builder.createOptimizationProfile(); - - bool hasDynamicShapes{false}; - - bool broadcastInputFormats = broadcastIOFormats(build.inputFormats, network.getNbInputs()); - - if (profile) - { - // Check if the provided input tensor names match the input tensors of the engine. - // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. - for (const auto& shape : build.shapes) - { - bool tensorNameFound{false}; - for (int32_t i = 0; i < network.getNbInputs(); ++i) - { - if (network.getInput(i)->getName() == shape.first) - { - tensorNameFound = true; - break; - } - } - if (!tensorNameFound) - { - sample::gLogError << "Cannot find input tensor with name \"" << shape.first << "\" in the network " - << "inputs! Please make sure the input tensor names are correct." << std::endl; - return false; - } - } - } - - for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) - { - // Set formats and data types of inputs - auto* input = network.getInput(i); - if (!build.inputFormats.empty()) - { - int inputFormatIndex = broadcastInputFormats ? 0 : i; - input->setType(build.inputFormats[inputFormatIndex].first); - input->setAllowedFormats(build.inputFormats[inputFormatIndex].second); - } - else - { - switch (input->getType()) - { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kHALF: - // Leave these as is. - break; - case nvinfer1::DataType::kFLOAT: - case nvinfer1::DataType::kINT8: - // User did not specify a floating-point format. Default to kFLOAT. - input->setType(nvinfer1::DataType::kFLOAT); - break; - } - input->setAllowedFormats(1U << static_cast(nvinfer1::TensorFormat::kLINEAR)); - } - - if (profile) - { - auto const dims = input->getDimensions(); - auto const isScalar = dims.nbDims == 0; - auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) - || input->isShapeTensor(); - if (isDynamicInput) - { - hasDynamicShapes = true; - auto shape = build.shapes.find(input->getName()); - ShapeRange shapes{}; - - // If no shape is provided, set dynamic dimensions to 1. - if (shape == build.shapes.end()) - { - constexpr int DEFAULT_DIMENSION = 1; - std::vector staticDims; - if (input->isShapeTensor()) - { - if (isScalar) - { - staticDims.push_back(1); - } - else - { - staticDims.resize(dims.d[0]); - std::fill(staticDims.begin(), staticDims.end(), DEFAULT_DIMENSION); - } - } - else - { - staticDims.resize(dims.nbDims); - std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), - [&](int dimension) { return dimension > 0 ? dimension : DEFAULT_DIMENSION; }); - } - sample::gLogWarning << "Dynamic dimensions required for input: " << input->getName() - << ", but no shapes were provided. Automatically overriding shape to: " - << staticDims << std::endl; - std::fill(shapes.begin(), shapes.end(), staticDims); - } - else - { - shapes = shape->second; - } - - std::vector profileDims{}; - if (input->isShapeTensor()) - { - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMIN)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues(input->getName(), nvinfer1::OptProfileSelector::kMIN, - profileDims.data(), static_cast(profileDims.size())), - "Error in set shape values MIN", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kOPT)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues(input->getName(), nvinfer1::OptProfileSelector::kOPT, - profileDims.data(), static_cast(profileDims.size())), - "Error in set shape values OPT", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMAX)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues(input->getName(), nvinfer1::OptProfileSelector::kMAX, - profileDims.data(), static_cast(profileDims.size())), - "Error in set shape values MAX", false, err); - } - else - { - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMIN)]; - SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, toDims(profileDims)), - "Error in set dimensions to profile MIN", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kOPT)]; - SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, toDims(profileDims)), - "Error in set dimensions to profile OPT", false, err); - profileDims = shapes[static_cast(nvinfer1::OptProfileSelector::kMAX)]; - SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, toDims(profileDims)), - "Error in set dimensions to profile MAX", false, err); - } - } - } - } - - if (!hasDynamicShapes && !build.shapes.empty()) - { - sample::gLogError << "Static model does not take explicit shapes since the shape of inference tensors will be " - "determined by the model itself" - << std::endl; - return false; - } - - if (profile && hasDynamicShapes) - { - SMP_RETVAL_IF_FALSE(profile->isValid(), "Required optimization profile is invalid", false, err); - SMP_RETVAL_IF_FALSE(config.addOptimizationProfile(profile) != -1, "Error in add optimization profile", false, err); - } - - bool broadcastOutputFormats = broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false); - - for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) - { - // Set formats and data types of outputs - auto* output = network.getOutput(i); - if (!build.outputFormats.empty()) - { - int outputFormatIndex = broadcastOutputFormats ? 0 : i; - output->setType(build.outputFormats[outputFormatIndex].first); - output->setAllowedFormats(build.outputFormats[outputFormatIndex].second); - } - else - { - output->setAllowedFormats(1U << static_cast(nvinfer1::TensorFormat::kLINEAR)); - } - } - - setMemoryPoolLimits(config, build); - - if (build.timingCacheMode == TimingCacheMode::kDISABLE) - config.setFlag(nvinfer1::BuilderFlag::kDISABLE_TIMING_CACHE); - - if (!build.tf32) - config.clearFlag(nvinfer1::BuilderFlag::kTF32); - - if (build.refittable) - config.setFlag(nvinfer1::BuilderFlag::kREFIT); - - if (build.sparsity != SparsityFlag::kDISABLE) - { - config.setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); - if (build.sparsity == SparsityFlag::kFORCE) - sparsify(network, sparseWeights); - } - - config.setProfilingVerbosity(build.profilingVerbosity); - config.setMinTimingIterations(build.minTiming); - config.setAvgTimingIterations(build.avgTiming); - - if (build.fp16) - config.setFlag(nvinfer1::BuilderFlag::kFP16); - - if (build.int8) - config.setFlag(nvinfer1::BuilderFlag::kINT8); - - if (build.int8 && !build.fp16) - { - sample::gLogInfo - << "FP32 and INT8 precisions have been specified - more performance might be enabled by additionally " - "specifying --fp16 or --best" - << std::endl; - } - - auto isInt8 = [](const IOFormat& format) { return format.first == nvinfer1::DataType::kINT8; }; - auto int8IO = std::count_if(build.inputFormats.begin(), build.inputFormats.end(), isInt8) - + std::count_if(build.outputFormats.begin(), build.outputFormats.end(), isInt8); - - auto hasQDQLayers = [](nvinfer1::INetworkDefinition& network) { - // Determine if our network has QDQ layers. - const auto nbLayers = network.getNbLayers(); - for (int32_t i = 0; i < nbLayers; i++) - { - const auto& layer = network.getLayer(i); - if (layer->getType() == nvinfer1::LayerType::kQUANTIZE || layer->getType() == nvinfer1::LayerType::kDEQUANTIZE) - return true; - } - return false; - }; - - if (!hasQDQLayers(network) && (build.int8 || int8IO) && build.calibration.empty()) - { - // Explicitly set int8 scales if no calibrator is provided and if I/O tensors use int8, - // because auto calibration does not support this case. - SMP_RETVAL_IF_FALSE(setTensorDynamicRange(network), "Error in set tensor dynamic range.", false, err); - } - else if (build.int8) - { - if (!hasQDQLayers(network) && int8IO) - { - try - { - // Set dynamic ranges of int8 inputs / outputs to match scales loaded from calibration cache - // TODO http://nvbugs/3262234 Change the network validation so that this workaround can be removed - setTensorScalesFromCalibration(network, build.inputFormats, build.outputFormats, build.calibration); - } - catch (std::exception&) - { - sample::gLogError - << "Int8IO was specified but impossible to read tensor scales from provided calibration cache file" - << std::endl; - return false; - } - } - nvinfer1::IOptimizationProfile* profileCalib{nullptr}; - if (!build.shapesCalib.empty()) - { - profileCalib = builder.createOptimizationProfile(); - for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) - { - auto* input = network.getInput(i); - nvinfer1::Dims profileDims{}; - auto shape = build.shapesCalib.find(input->getName()); - ShapeRange shapesCalib{}; - shapesCalib = shape->second; - - profileDims = toDims(shapesCalib[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - // Here we check only kMIN as all profileDims are the same. - SMP_RETVAL_IF_FALSE( - profileCalib->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, profileDims), - "Error in set dimensions to calibration profile OPT", false, err); - profileCalib->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, profileDims); - profileCalib->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, profileDims); - } - SMP_RETVAL_IF_FALSE(profileCalib->isValid(), "Calibration profile is invalid", false, err); - SMP_RETVAL_IF_FALSE(config.setCalibrationProfile(profileCalib), "Error in set calibration profile", false, err); - } - - std::vector elemCount{}; - for (int i = 0; i < network.getNbInputs(); i++) - { - auto* input = network.getInput(i); - auto const dims = input->getDimensions(); - auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); - - if (profileCalib) - elemCount.push_back(volume(profileCalib->getDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT))); - else if (profile && isDynamicInput) - elemCount.push_back(volume(profile->getDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT))); - else - elemCount.push_back(volume(input->getDimensions())); - } - - config.setInt8Calibrator(new RndInt8Calibrator(1, elemCount, build.calibration, network, err)); - } - - if (build.directIO) - config.setFlag(nvinfer1::BuilderFlag::kDIRECT_IO); - - switch (build.precisionConstraints) - { - case PrecisionConstraints::kNONE: - // It's the default for TensorRT. - break; - case PrecisionConstraints::kOBEY: - config.setFlag(nvinfer1::BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); - break; - case PrecisionConstraints::kPREFER: config.setFlag(nvinfer1::BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); break; - } - - if (!build.layerPrecisions.empty() && build.precisionConstraints != PrecisionConstraints::kNONE) - setLayerPrecisions(network, build.layerPrecisions); - - if (!build.layerOutputTypes.empty() && build.precisionConstraints != PrecisionConstraints::kNONE) - setLayerOutputTypes(network, build.layerOutputTypes); - - if (build.safe) - config.setEngineCapability(sys.DLACore != -1 ? nvinfer1::EngineCapability::kDLA_STANDALONE : nvinfer1::EngineCapability::kSAFETY); - - if (build.restricted) - config.setFlag(nvinfer1::BuilderFlag::kSAFETY_SCOPE); - - if (sys.DLACore != -1) - { - if (sys.DLACore < builder.getNbDLACores()) - { - config.setDefaultDeviceType(nvinfer1::DeviceType::kDLA); - config.setDLACore(sys.DLACore); - config.setFlag(nvinfer1::BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); - - if (sys.fallback) - config.setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); - else // Reformatting runs on GPU, so avoid I/O reformatting - config.setFlag(nvinfer1::BuilderFlag::kDIRECT_IO); - if (!build.int8) - config.setFlag(nvinfer1::BuilderFlag::kFP16); - } - else - { - err << "Cannot create DLA engine, " << sys.DLACore << " not available" << std::endl; - return false; - } - } - - if (build.enabledTactics || build.disabledTactics) - { - nvinfer1::TacticSources tacticSources = config.getTacticSources(); - tacticSources |= build.enabledTactics; - tacticSources &= ~build.disabledTactics; - config.setTacticSources(tacticSources); - } - - return true; -} - -//! -//! \brief Create an engine for a network defintion -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -bool networkToEngine(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - BuildEnvironment& env, std::ostream& err) -{ - TrtUniquePtr config{builder.createBuilderConfig()}; - std::vector> sparseWeights; - SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err); - SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, *env.network, *config, err, sparseWeights), - "Network And Config setup failed", false, err); - - std::unique_ptr timingCache{nullptr}; - // Try to load cache from file. Create a fresh cache if the file doesn't exist - if (build.timingCacheMode == TimingCacheMode::kGLOBAL) - { - std::vector loadedCache = loadTimingCacheFile(build.timingCacheFile); - timingCache.reset(config->createTimingCache(static_cast(loadedCache.data()), loadedCache.size())); - SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", false, err); - config->setTimingCache(*timingCache, false); - } - - // CUDA stream used for profiling by the builder. - auto profileStream = samplesCommon::makeCudaStream(); - SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", false, err); - config->setProfileStream(*profileStream); - - TrtUniquePtr serializedEngine{builder.buildSerializedNetwork(*env.network, *config)}; - SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, "Engine could not be created from network", false, err); - - env.engineBlob.resize(serializedEngine->size()); - std::memcpy(env.engineBlob.data(), serializedEngine->data(), serializedEngine->size()); - - if (build.safe) - { - ASSERT(sample::hasSafeRuntime()); - std::unique_ptr safeRuntime{sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(safeRuntime != nullptr, "SafeRuntime creation failed", false, err); - safeRuntime->setErrorRecorder(&gRecorder); - env.safeEngine.reset(safeRuntime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size())); - if (build.consistency) - checkSafeEngine(serializedEngine->data(), serializedEngine->size()); - - SMP_RETVAL_IF_FALSE(env.safeEngine != nullptr, "SafeEngine deserialization failed", false, err); - } - else - { - TrtUniquePtr runtime{nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(runtime != nullptr, "Runtime creation failed", false, err); - runtime->setErrorRecorder(&gRecorder); - env.engine.reset(runtime->deserializeCudaEngine(serializedEngine->data(), serializedEngine->size())); - SMP_RETVAL_IF_FALSE(env.engine != nullptr, "Engine deserialization failed", false, err); - if (build.timingCacheMode == TimingCacheMode::kGLOBAL) - { - auto const& timingCache = config->getTimingCache(); - std::unique_ptr timingCacheHostData{timingCache->serialize()}; - SMP_RETVAL_IF_FALSE(timingCacheHostData != nullptr, "Timing Cache serialization failed", false, err); - saveTimingCacheFile(build.timingCacheFile, timingCacheHostData.get()); - } - if (config->getInt8Calibrator()) - delete config->getInt8Calibrator(); - } - return true; -} - -//! -//! \brief Parse a given model, create a network and an engine. -//! -bool modelToBuildEnv( - const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, BuildEnvironment& env, std::ostream& err) -{ - TrtUniquePtr builder{nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", false, err); - builder->setErrorRecorder(&gRecorder); - auto networkFlags = (build.maxBatch) ? 0U : 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - - env.network.reset(builder->createNetworkV2(networkFlags)); - SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false, err); - env.parser = modelToNetwork(model, *env.network, err); - SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, err); - SMP_RETVAL_IF_FALSE(networkToEngine(build, sys, *builder, env, err), "Building engine failed", false, err); - return true; -} - -namespace -{ -std::pair, std::vector> getLayerWeightsRolePair(nvinfer1::IRefitter& refitter) -{ - // Get number of refittable items. - auto const nbAll = refitter.getAll(0, nullptr, nullptr); - std::vector layerNames(nbAll); - // Allocate buffers for the items and get them. - std::vector weightsRoles(nbAll); - refitter.getAll(nbAll, layerNames.data(), weightsRoles.data()); - std::vector layerNameStrs(nbAll); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { - if (name == nullptr) - return std::string{}; - - return std::string{name}; - }); - return {layerNameStrs, weightsRoles}; -} - -std::pair, std::vector> getMissingLayerWeightsRolePair(nvinfer1::IRefitter& refitter) -{ - // Get number of refittable items. - auto const nbMissing = refitter.getMissing(0, nullptr, nullptr); - std::vector layerNames(nbMissing); - // Allocate buffers for the items and get them. - std::vector weightsRoles(nbMissing); - refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); - std::vector layerNameStrs(nbMissing); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { - if (name == nullptr) - return std::string{}; - return std::string{name}; - }); - return {layerNameStrs, weightsRoles}; -} - -bool loadEngineToEnv(const std::string& engine, int DLACore, bool safe, bool enableConsistency, BuildEnvironment& env, std::ostream& err) -{ - std::ifstream engineFile(engine, std::ios::binary); - SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << engine); - engineFile.seekg(0, std::ifstream::end); - int64_t fsize = engineFile.tellg(); - engineFile.seekg(0, std::ifstream::beg); - - env.engineBlob.resize(fsize); - engineFile.read(reinterpret_cast(env.engineBlob.data()), fsize); - SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error loading engine file: " << engine); - - if (safe) - { - ASSERT(sample::hasSafeRuntime()); - std::unique_ptr safeRuntime{sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - safeRuntime->setErrorRecorder(&gRecorder); - env.safeEngine.reset(safeRuntime->deserializeCudaEngine(env.engineBlob.data(), fsize)); - bool result = env.safeEngine != nullptr; - if (result && enableConsistency) - { - checkSafeEngine(env.engineBlob.data(), fsize); - } - return result; - } - - TrtUniquePtr runtime{nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())}; - if (DLACore != -1) - runtime->setDLACore(DLACore); - - runtime->setErrorRecorder(&gRecorder); - env.engine.reset(runtime->deserializeCudaEngine(env.engineBlob.data(), fsize)); - return env.engine != nullptr; -} -} // namespace - -void dumpRefittable(nvinfer1::ICudaEngine& engine) -{ - TrtUniquePtr refitter{nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger())}; - if (refitter == nullptr) - { - sample::gLogError << "Failed to create a refitter." << std::endl; - return; - } - - auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); - auto const& layerNames = layerWeightsRolePair.first; - auto const& weightsRoles = layerWeightsRolePair.second; - auto const nbAll = layerWeightsRolePair.first.size(); - for (size_t i = 0; i < nbAll; ++i) - { - sample::gLogInfo << layerNames[i] << " " << weightsRoles[i] << std::endl; - } -} - -nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, std::ostream& err) -{ - BuildEnvironment env; - return loadEngineToEnv(engine, DLACore, false, false, env, err) ? env.engine.release() : nullptr; -} - -bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName, std::ostream& err) -{ - std::ofstream engineFile(fileName, std::ios::binary); - if (!engineFile) - { - err << "Cannot open engine file: " << fileName << std::endl; - return false; - } - - TrtUniquePtr serializedEngine{engine.serialize()}; - if (serializedEngine == nullptr) - { - err << "Engine serialization failed" << std::endl; - return false; - } - - engineFile.write(static_cast(serializedEngine->data()), serializedEngine->size()); - return !engineFile.fail(); -} - -bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, - BuildEnvironment& env, std::ostream& err) -{ - TrtUniquePtr engine; - TrtUniquePtr network; - Parser parser; - - bool createEngineSuccess {false}; - - if (build.load) - createEngineSuccess = loadEngineToEnv(build.engine, sys.DLACore, build.safe, build.consistency, env, err); - else - createEngineSuccess = modelToBuildEnv(model, build, sys, env, err); - - SMP_RETVAL_IF_FALSE(createEngineSuccess, "Failed to create engine from model.", false, err); - - if (build.save) - { - std::ofstream engineFile(build.engine, std::ios::binary); - engineFile.write(reinterpret_cast(env.engineBlob.data()), env.engineBlob.size()); - SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.", false, err); - } - return true; -} - -nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, std::ostream& err) -{ - TrtUniquePtr config{builder.createBuilderConfig()}; - std::vector> sparseWeights; - SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", nullptr, err); - SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, network, *config, err, sparseWeights), - "Network And Config setup failed", nullptr, err); - return builder.buildSerializedNetwork(network, *config); -} - -nvinfer1::IHostMemory* modelToSerialized( - const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err) -{ - TrtUniquePtr builder{nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", nullptr, err); - builder->setErrorRecorder(&gRecorder); - - auto networkFlags - = (build.maxBatch) ? 0U : 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - - TrtUniquePtr network{builder->createNetworkV2(networkFlags)}; - SMP_RETVAL_IF_FALSE(network != nullptr, "Network creation failed", nullptr, err); - - Parser parser = modelToNetwork(model, *network, err); - SMP_RETVAL_IF_FALSE(parser.operator bool(), "Parsing model failed", nullptr, err); - - return networkToSerialized(build, sys, *builder, *network, err); -} - -bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err) -{ - TrtUniquePtr serialized{modelToSerialized(model, build, sys, err)}; - SMP_RETVAL_IF_FALSE(serialized != nullptr, "Network serialization failed", false, err); - - std::ofstream engineFile(build.engine, std::ios::binary); - SMP_RETVAL_IF_FALSE(!!engineFile, "Cannot open a file to save a serialize network", false, err); - engineFile.write(static_cast(serialized->data()), serialized->size()); - return !engineFile.fail(); -} - -// There is not a getWeightsName API, so we need to use WeightsRole. -std::vector> getAllRefitWeightsForLayer(const nvinfer1::ILayer& l) -{ - switch (l.getType()) - { - case nvinfer1::LayerType::kCONSTANT: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kCONSTANT, layer.getWeights())}; - } - case nvinfer1::LayerType::kCONVOLUTION: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(nvinfer1::WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case nvinfer1::LayerType::kDECONVOLUTION: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(nvinfer1::WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case nvinfer1::LayerType::kFULLY_CONNECTED: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(nvinfer1::WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case nvinfer1::LayerType::kSCALE: - { - const auto& layer = static_cast(l); - return {std::make_pair(nvinfer1::WeightsRole::kSCALE, layer.getScale()), - std::make_pair(nvinfer1::WeightsRole::kSHIFT, layer.getShift())}; - } - case nvinfer1::LayerType::kRNN_V2: - case nvinfer1::LayerType::kACTIVATION: - case nvinfer1::LayerType::kPOOLING: - case nvinfer1::LayerType::kLRN: - case nvinfer1::LayerType::kSOFTMAX: - case nvinfer1::LayerType::kSHUFFLE: - case nvinfer1::LayerType::kCONCATENATION: - case nvinfer1::LayerType::kELEMENTWISE: - case nvinfer1::LayerType::kPLUGIN: - case nvinfer1::LayerType::kUNARY: - case nvinfer1::LayerType::kPADDING: - case nvinfer1::LayerType::kREDUCE: - case nvinfer1::LayerType::kTOPK: - case nvinfer1::LayerType::kGATHER: - case nvinfer1::LayerType::kMATRIX_MULTIPLY: - case nvinfer1::LayerType::kRAGGED_SOFTMAX: - case nvinfer1::LayerType::kIDENTITY: - case nvinfer1::LayerType::kPLUGIN_V2: - case nvinfer1::LayerType::kSLICE: - case nvinfer1::LayerType::kFILL: - case nvinfer1::LayerType::kSHAPE: - case nvinfer1::LayerType::kPARAMETRIC_RELU: - case nvinfer1::LayerType::kRESIZE: - case nvinfer1::LayerType::kTRIP_LIMIT: - case nvinfer1::LayerType::kRECURRENCE: - case nvinfer1::LayerType::kITERATOR: - case nvinfer1::LayerType::kLOOP_OUTPUT: - case nvinfer1::LayerType::kSELECT: - case nvinfer1::LayerType::kQUANTIZE: - case nvinfer1::LayerType::kDEQUANTIZE: - case nvinfer1::LayerType::kCONDITION: - case nvinfer1::LayerType::kCONDITIONAL_INPUT: - case nvinfer1::LayerType::kCONDITIONAL_OUTPUT: - case nvinfer1::LayerType::kSCATTER: - case nvinfer1::LayerType::kEINSUM: - case nvinfer1::LayerType::kASSERTION: return {}; - } - return {}; -} - -bool timeRefit(nvinfer1::INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, bool multiThreading) -{ - using time_point = std::chrono::time_point; - using durationMs = std::chrono::duration; - - auto const nbLayers = network.getNbLayers(); - TrtUniquePtr refitter{nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger())}; - // Set max threads that can be used by refitter. - if (multiThreading && !refitter->setMaxThreads(10)) - { - sample::gLogError << "Failed to set max threads to refitter." << std::endl; - return false; - } - auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); - // We use std::string instead of const char* since we can have copies of layer names. - std::set> layerRoleSet; - - auto const& layerNames = layerWeightsRolePair.first; - auto const& weightsRoles = layerWeightsRolePair.second; - - std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(), - std::inserter(layerRoleSet, layerRoleSet.begin()), - [](std::string const& layerName, nvinfer1::WeightsRole const role) { return std::make_pair(layerName, role); }); - - auto const isRefittable = [&layerRoleSet](char const* layerName, nvinfer1::WeightsRole const role) { - return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); - }; - - auto const setWeights = [&] { - for (int32_t i = 0; i < nbLayers; i++) - { - auto const layer = network.getLayer(i); - auto const roleWeightsVec = getAllRefitWeightsForLayer(*layer); - for (auto const& roleWeights : roleWeightsVec) - { - if (isRefittable(layer->getName(), roleWeights.first)) - { - bool const success = refitter->setWeights(layer->getName(), roleWeights.first, roleWeights.second); - if (!success) - return false; - } - } - } - return true; - }; - - auto const reportMissingWeights = [&] { - auto const& missingPair = getMissingLayerWeightsRolePair(*refitter); - auto const& layerNames = missingPair.first; - auto const& weightsRoles = missingPair.second; - for (size_t i = 0; i < layerNames.size(); ++i) - { - sample::gLogError << "Missing (" << layerNames[i] << ", " << weightsRoles[i] << ") for refitting." - << std::endl; - } - return layerNames.empty(); - }; - - // Warm up and report missing weights - bool const success = setWeights() && reportMissingWeights() && refitter->refitCudaEngine(); - if (!success) - { - return false; - } - - constexpr int32_t loop = 10; - time_point const refitStartTime{std::chrono::steady_clock::now()}; - { - for (int32_t l = 0; l < loop; l++) - { - bool const success = setWeights() && refitter->refitCudaEngine(); - if (!success) - { - return false; - } - } - } - time_point const refitEndTime{std::chrono::steady_clock::now()}; - - sample::gLogInfo << "Engine refitted" - << " in " << durationMs(refitEndTime - refitStartTime).count() / loop << " ms." << std::endl; - return true; -} - -namespace -{ -void* initSafeRuntime() -{ - void* handle{nullptr}; -#if !defined(_WIN32) - std::string const dllName{samplesCommon::isDebug() ? "libnvinfer_safe_debug.so.8" : "libnvinfer_safe.so.8"}; -#if SANITIZER_BUILD - handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); -#else - handle = dlopen(dllName.c_str(), RTLD_LAZY); -#endif -#endif - return handle; -} - -void* initConsistencyCheckerLibrary() -{ - void* handle{nullptr}; -#if !defined(_WIN32) - std::string const dllName{samplesCommon::isDebug() ? "libnvinfer_checker_debug.so.8" : "libnvinfer_checker.so.8"}; -#if SANITIZER_BUILD - handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); -#else - handle = dlopen(dllName.c_str(), RTLD_LAZY); -#endif -#endif - return handle; -} - -#if !defined(_WIN32) -struct DllDeleter -{ - void operator()(void* handle) - { - if (handle != nullptr) - { - dlclose(handle); - } - } -}; -const std::unique_ptr safeRuntimeLibrary{initSafeRuntime()}; -const std::unique_ptr consistencyCheckerLibrary{initConsistencyCheckerLibrary()}; -#endif -} // namespace - -bool hasSafeRuntime() -{ - bool ret{false}; -#if !defined(_WIN32) - ret = (safeRuntimeLibrary != nullptr); -#endif - return ret; -} - -nvinfer1::safe::IRuntime* createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept -{ - nvinfer1::safe::IRuntime* runtime{nullptr}; -#if !defined(_WIN32) - constexpr char symbolName[] = "_ZN8nvinfer14safe18createInferRuntimeERNS_7ILoggerE"; - typedef nvinfer1::safe::IRuntime* (*CreateInferRuntimeFn)(nvinfer1::ILogger & logger); - if (hasSafeRuntime()) - { - auto createFn = reinterpret_cast(dlsym(safeRuntimeLibrary.get(), symbolName)); - if (createFn != nullptr) - { - runtime = createFn(logger); - } - } -#endif - return runtime; -} - -bool hasConsistencyChecker() -{ - bool ret{false}; -#if !defined(_WIN32) - ret = (consistencyCheckerLibrary != nullptr); -#endif - return ret; -} - -nvinfer1::consistency::IConsistencyChecker* createConsistencyChecker( - nvinfer1::ILogger& logger, void const* serializedEngine, int32_t const engineSize) noexcept -{ - nvinfer1::consistency::IConsistencyChecker* checker{nullptr}; - - if (serializedEngine == nullptr || engineSize == 0) - { - return checker; - } - -#if !defined(_WIN32) - constexpr char symbolName[] = "createConsistencyChecker_INTERNAL"; - typedef nvinfer1::consistency::IConsistencyChecker* (*CreateCheckerFn)( - nvinfer1::ILogger * logger, void const* data, size_t size, uint32_t version); - if (hasSafeRuntime()) - { - auto createFn = reinterpret_cast(dlsym(consistencyCheckerLibrary.get(), symbolName)); - if (createFn != nullptr) - { - checker = createFn(&logger, serializedEngine, engineSize, NV_TENSORRT_VERSION); - } - } -#endif - return checker; -} - -bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize) -{ - - if (!hasConsistencyChecker()) - { - sample::gLogError << "Cannot perform consistency check because the checker is not loaded.." << std::endl; - return false; - } - auto checker = std::unique_ptr( - createConsistencyChecker(sample::gLogger.getTRTLogger(), serializedEngine, engineSize)); - if (checker.get() == nullptr) - { - sample::gLogError << "Failed to create consistency checker." << std::endl; - return false; - } - sample::gLogInfo << "Start consistency checking." << std::endl; - if (!checker->validate()) - { - sample::gLogError << "Consistency validation failed." << std::endl; - return false; - } - sample::gLogInfo << "Consistency validation passed." << std::endl; - return true; -} -} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.h deleted file mode 100644 index 620b51a1c..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleEngines.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_ENGINES_H -#define TRT_SAMPLE_ENGINES_H - -#include -#include - -#include "NvInfer.h" - -#if (NV_TENSORRT_MAJOR > 7) - -#include "NvInferConsistency.h" -#include "NvInferSafeRuntime.h" - -#endif - -#include "NvOnnxParser.h" -#include "sampleOptions.h" -#include "sampleUtils.h" - -namespace sample -{ - -struct Parser -{ - TrtUniquePtr onnxParser; - - operator bool() const - { - return onnxParser.operator bool(); - } -}; - -struct BuildEnvironment -{ - TrtUniquePtr network; - //! Parser that creates the network. Must be declared *after* network, so that when - //! ~BuildEnvironment() executes, the parser is destroyed before the network is destroyed. - Parser parser; - TrtUniquePtr engine; - std::unique_ptr safeEngine; - std::vector engineBlob; -}; - -//! -//! \brief Generate a network definition for a given model -//! -//! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid -//! parser (the returned parser converts to false if tested) -//! -//! Constant input dimensions in the model must not be changed in the corresponding -//! network definition, because its correctness may rely on the constants. -//! -//! \see Parser::operator bool() -//! -Parser modelToNetwork(const ModelOptions& model, nvinfer1::INetworkDefinition& network, std::ostream& err); - -//! -//! \brief Set up network and config -//! -//! \return boolean Return true if network and config were successfully set -//! -bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, nvinfer1::IBuilderConfig& config, std::ostream& err, - std::vector>& sparseWeights); - -//! -//! \brief Log refittable layers and weights of a refittable engine -//! -void dumpRefittable(nvinfer1::ICudaEngine& engine); - -//! -//! \brief Load a serialized engine -//! -//! \return Pointer to the engine loaded or nullptr if the operation failed -//! -nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, std::ostream& err); - -//! -//! \brief Save an engine into a file -//! -//! \return boolean Return true if the engine was successfully saved -//! -bool saveEngine(const nvinfer1::ICudaEngine& engine, const std::string& fileName, std::ostream& err); - -//! -//! \brief Create an engine from model or serialized file, and optionally save engine -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, - BuildEnvironment& env, std::ostream& err); - -//! -//! \brief Create an engine from model or serialized file, and optionally save engine -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -inline TrtUniquePtr getEngine( - const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err) -{ - BuildEnvironment env; - TrtUniquePtr engine; - if (getEngineBuildEnv(model, build, sys, env, err)) - { - engine.swap(env.engine); - } - return engine; -} - -//! -//! \brief Create a serialized network -//! -//! \return Pointer to a host memory for a serialized network -//! -nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, - nvinfer1::INetworkDefinition& network, std::ostream& err); - -//! -//! \brief Tranfer model to a serialized network -//! -//! \return Pointer to a host memory for a serialized network -//! -nvinfer1::IHostMemory* modelToSerialized(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); - -//! -//! \brief Serialize network and save it into a file -//! -//! \return boolean Return true if the network was successfully serialized and saved -//! -bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); - -bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngine& engine, bool multiThreading); - -//! -//! \brief Set tensor scales from a calibration table -//! -void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, const std::vector& inputFormats, - const std::vector& outputFormats, const std::string& calibrationFile); - -//! -//! \brief Check if safe runtime is loaded. -//! -bool hasSafeRuntime(); - -//! -//! \brief Create a safe runtime object if the dynamic library is loaded. -//! -nvinfer1::safe::IRuntime* createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept; - -//! -//! \brief Check if consistency checker is loaded. -//! -bool hasConsistencyChecker(); - -//! -//! \brief Create a consistency checker object if the dynamic library is loaded. -//! -nvinfer1::consistency::IConsistencyChecker* createConsistencyChecker( - nvinfer1::ILogger& logger, nvinfer1::IHostMemory const* engine) noexcept; - -//! -//! \brief Run consistency check on serialized engine. -//! -bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize); -} // namespace sample - -#endif // TRT_SAMPLE_ENGINES_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleInference.cpp b/src/Detector/tensorrt_yolo/common_deprecated/sampleInference.cpp deleted file mode 100644 index 51f168822..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleInference.cpp +++ /dev/null @@ -1,990 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(__QNX__) -#include -#include -#endif - -#include "NvInfer.h" - -#include "ErrorRecorder.h" -#include "logger.h" -#include "sampleDevice.h" -#include "sampleEngines.h" -#include "sampleInference.h" -#include "sampleOptions.h" -#include "sampleReporting.h" -#include "sampleUtils.h" - -namespace sample -{ - -template -bool validateTensorNames( - const MapType& map, const EngineType* engine, const int32_t endBindingIndex) -{ - // Check if the provided input tensor names match the input tensors of the engine. - // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. - for (const auto& item : map) - { - bool tensorNameFound{false}; - for (int32_t b = 0; b < endBindingIndex; ++b) - { - if (engine->bindingIsInput(b) && engine->getBindingName(b) == item.first) - { - tensorNameFound = true; - break; - } - } - if (!tensorNameFound) - { - sample::gLogError << "Cannot find input tensor with name \"" << item.first << "\" in the engine bindings! " - << "Please make sure the input tensor names are correct." << std::endl; - return false; - } - } - return true; -} - -template -class FillBindingClosure -{ -private: - using InputsMap = std::unordered_map; - using BindingsVector = std::vector>; - - EngineType const* engine; - ContextType const* context; - InputsMap const& inputs; - BindingsVector& bindings; - int32_t batch; - int32_t endBindingIndex; - - void fillOneBinding(int32_t bindingIndex, int64_t vol) - { - auto const dims = getDims(bindingIndex); - auto const name = engine->getBindingName(bindingIndex); - auto const isInput = engine->bindingIsInput(bindingIndex); - auto const dataType = engine->getBindingDataType(bindingIndex); - auto const *bindingInOutStr = isInput ? "input" : "output"; - for (auto& binding : bindings) - { - const auto input = inputs.find(name); - if (isInput && input != inputs.end()) - { - sample::gLogInfo << "Using values loaded from " << input->second << " for input " << name << std::endl; - binding->addBinding(bindingIndex, name, isInput, vol, dataType, input->second); - } - else - { - sample::gLogInfo << "Using random values for " << bindingInOutStr << " " << name << std::endl; - binding->addBinding(bindingIndex, name, isInput, vol, dataType); - } - sample::gLogInfo << "Created " << bindingInOutStr <<" binding for " << name << " with dimensions " << dims << std::endl; - } - } - - bool fillAllBindings(int32_t batch, int32_t endBindingIndex) - { - if (!validateTensorNames(inputs, engine, endBindingIndex)) - { - sample::gLogError << "Invalid tensor names found in --loadInputs flag." << std::endl; - return false; - } - - for (int32_t b = 0; b < endBindingIndex; b++) - { - auto const dims = getDims(b); - auto const comps = engine->getBindingComponentsPerElement(b); - auto const strides = context->getStrides(b); - int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b); - auto const vol = volume(dims, strides, vectorDimIndex, comps, batch); - fillOneBinding(b, vol); - } - return true; - } - - nvinfer1::Dims getDims(int32_t bindingIndex); - -public: - FillBindingClosure(EngineType const* _engine, ContextType const* _context, InputsMap const& _inputs, BindingsVector& _bindings, int32_t _batch, int32_t _endBindingIndex) - : engine(_engine) - , context(_context) - , inputs(_inputs) - , bindings(_bindings) - , batch(_batch) - , endBindingIndex(_endBindingIndex) - { - } - - bool operator()() - { - return fillAllBindings(batch, endBindingIndex); - } -}; - -template <> -nvinfer1::Dims FillBindingClosure::getDims(int32_t bindingIndex) -{ - return context->getBindingDimensions(bindingIndex); -} - -template <> -nvinfer1::Dims FillBindingClosure::getDims(int32_t bindingIndex) -{ - return engine->getBindingDimensions(bindingIndex); -} - -bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference) -{ - int32_t device{}; - cudaCheck(cudaGetDevice(&device)); - - cudaDeviceProp properties; - cudaCheck(cudaGetDeviceProperties(&properties, device)); - // Use managed memory on integrated devices when transfers are skipped - // and when it is explicitly requested on the commandline. - bool useManagedMemory{(inference.skipTransfers && properties.integrated) || inference.useManaged}; - using FillSafeBindings = FillBindingClosure; - if (iEnv.safe) - { - ASSERT(sample::hasSafeRuntime()); - auto* safeEngine = iEnv.safeEngine.get(); - for (int32_t s = 0; s < inference.streams; ++s) - { - iEnv.safeContext.emplace_back(safeEngine->createExecutionContext()); - iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); - } - const int32_t nBindings = safeEngine->getNbBindings(); - auto const* safeContext = iEnv.safeContext.front().get(); - // batch is set to 1 because safety only support explicit batch. - return FillSafeBindings(iEnv.safeEngine.get(), safeContext, inference.inputs, iEnv.bindings, 1, nBindings)(); - } - - using FillStdBindings = FillBindingClosure; - - for (int32_t s = 0; s < inference.streams; ++s) - { - auto ec = iEnv.engine->createExecutionContext(); - if (ec == nullptr) - { - sample::gLogError << "Unable to create execution context for stream " << s << "." << std::endl; - return false; - } - iEnv.context.emplace_back(ec); - iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); - } - if (iEnv.profiler) - { - iEnv.context.front()->setProfiler(iEnv.profiler.get()); - // Always run reportToProfiler() after enqueue launch - iEnv.context.front()->setEnqueueEmitsProfile(false); - } - - const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles(); - const int32_t nBindings = iEnv.engine->getNbBindings(); - const int32_t bindingsInProfile = nOptProfiles > 0 ? nBindings / nOptProfiles : 0; - const int32_t endBindingIndex = bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings(); - - if (nOptProfiles > 1) - { - sample::gLogWarning << "Multiple profiles are currently not supported. Running with one profile." << std::endl; - } - - // Make sure that the tensor names provided in command-line args actually exist in any of the engine bindings - // to avoid silent typos. - if (!validateTensorNames(inference.shapes, iEnv.engine.get(), endBindingIndex)) - { - sample::gLogError << "Invalid tensor names found in --shapes flag." << std::endl; - return false; - } - - // Set all input dimensions before all bindings can be allocated - for (int32_t b = 0; b < endBindingIndex; ++b) - { - if (iEnv.engine->bindingIsInput(b)) - { - auto dims = iEnv.context.front()->getBindingDimensions(b); - const bool isScalar = dims.nbDims == 0; - const bool isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) - || iEnv.engine->isShapeBinding(b); - if (isDynamicInput) - { - auto shape = inference.shapes.find(iEnv.engine->getBindingName(b)); - - std::vector staticDims; - if (shape == inference.shapes.end()) - { - // If no shape is provided, set dynamic dimensions to 1. - constexpr int32_t DEFAULT_DIMENSION = 1; - if (iEnv.engine->isShapeBinding(b)) - { - if (isScalar) - { - staticDims.push_back(1); - } - else - { - staticDims.resize(dims.d[0]); - std::fill(staticDims.begin(), staticDims.end(), DEFAULT_DIMENSION); - } - } - else - { - staticDims.resize(dims.nbDims); - std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), - [&](int32_t dimension) { return dimension >= 0 ? dimension : DEFAULT_DIMENSION; }); - } - sample::gLogWarning << "Dynamic dimensions required for input: " << iEnv.engine->getBindingName(b) - << ", but no shapes were provided. Automatically overriding shape to: " - << staticDims << std::endl; - } - else if (inference.inputs.count(shape->first) && iEnv.engine->isShapeBinding(b)) - { - if (isScalar || dims.nbDims == 1) - { - // Load shape tensor from file. - size_t const size = isScalar ? 1 : dims.d[0]; - staticDims.resize(size); - auto const& filename = inference.inputs.at(shape->first); - auto dst = reinterpret_cast(staticDims.data()); - loadFromFile(filename, dst, size * sizeof(decltype(staticDims)::value_type)); - } - else - { - sample::gLogWarning << "Cannot load shape tensor " << shape->first << " from file, " - << "ND-Shape isn't supported yet" << std::endl; - // Fallback - staticDims = shape->second; - } - } - else - { - staticDims = shape->second; - } - - for (auto& c : iEnv.context) - { - if (iEnv.engine->isShapeBinding(b)) - { - if (!c->setInputShapeBinding(b, staticDims.data())) - { - return false; - } - } - else - { - if (!c->setBindingDimensions(b, toDims(staticDims))) - { - return false; - } - } - } - } - } - } - - auto* engine = iEnv.engine.get(); - auto const* context = iEnv.context.front().get(); - int32_t const batch = engine->hasImplicitBatchDimension() ? inference.batch : 1; - return FillStdBindings(engine, context, inference.inputs, iEnv.bindings, batch, endBindingIndex)(); -} - -namespace -{ - -#if defined(__QNX__) -using TimePoint = double; -#else -using TimePoint = std::chrono::time_point; -#endif - -TimePoint getCurrentTime() -{ -#if defined(__QNX__) - uint64_t const currentCycles = ClockCycles(); - uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec; - // Return current timestamp in ms. - return static_cast(currentCycles) * 1000. / cyclesPerSecond; -#else - return std::chrono::high_resolution_clock::now(); -#endif -} - -//! -//! \struct SyncStruct -//! \brief Threads synchronization structure -//! -struct SyncStruct -{ - std::mutex mutex; - TrtCudaStream mainStream; - TrtCudaEvent gpuStart{cudaEventBlockingSync}; - TimePoint cpuStart{}; - float sleep{}; -}; - -struct Enqueue -{ - explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers) - : mContext(context) - , mBuffers(buffers) - { - } - - nvinfer1::IExecutionContext& mContext; - void** mBuffers{}; -}; - -//! -//! \class EnqueueImplicit -//! \brief Functor to enqueue inference with implict batch -//! -class EnqueueImplicit : private Enqueue -{ - -public: - explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers, int32_t batch) - : Enqueue(context, buffers) - , mBatch(batch) - { - } - - bool operator()(TrtCudaStream& stream) const - { - if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) - { - // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) - { - gLogWarning << "Failed to collect layer timing info from previous enqueue()" << std::endl; - } - return true; - } - return false; - } - -private: - int32_t mBatch; -}; - -//! -//! \class EnqueueExplicit -//! \brief Functor to enqueue inference with explict batch -//! -class EnqueueExplicit : private Enqueue -{ - -public: - explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers) - : Enqueue(context, buffers) - { - } - - bool operator()(TrtCudaStream& stream) const - { - if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) - { - // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) - { - gLogWarning << "Failed to collect layer timing info from previous enqueueV2()" << std::endl; - } - return true; - } - return false; - } -}; - -//! -//! \class EnqueueGraph -//! \brief Functor to enqueue inference from CUDA Graph -//! -class EnqueueGraph -{ - -public: - explicit EnqueueGraph(nvinfer1::IExecutionContext& context, TrtCudaGraph& graph) - : mGraph(graph) - , mContext(context) - { - } - - bool operator()(TrtCudaStream& stream) const - { - if (mGraph.launch(stream)) - { - // Collecting layer timing info from current profile index of execution context - if (mContext.getProfiler() && !mContext.reportToProfiler()) - { - gLogWarning << "Failed to collect layer timing info from previous CUDA graph launch" << std::endl; - } - return true; - } - return false; - } - - TrtCudaGraph& mGraph; - nvinfer1::IExecutionContext& mContext; -}; - -//! -//! \class EnqueueSafe -//! \brief Functor to enqueue safe execution context -//! -class EnqueueSafe -{ -public: - explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context, void** buffers) - : mContext(context) - , mBuffers(buffers) - { - } - - bool operator()(TrtCudaStream& stream) const - { - if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) - { - return true; - } - return false; - } - - nvinfer1::safe::IExecutionContext& mContext; - void** mBuffers{}; -}; - -using EnqueueFunction = std::function; - -enum class StreamType : int32_t -{ - kINPUT = 0, - kCOMPUTE = 1, - kOUTPUT = 2, - kNUM = 3 -}; - -enum class EventType : int32_t -{ - kINPUT_S = 0, - kINPUT_E = 1, - kCOMPUTE_S = 2, - kCOMPUTE_E = 3, - kOUTPUT_S = 4, - kOUTPUT_E = 5, - kNUM = 6 -}; - -using MultiStream = std::array(StreamType::kNUM)>; - -using MultiEvent = std::array, static_cast(EventType::kNUM)>; - -using EnqueueTimes = std::array; - -//! -//! \class Iteration -//! \brief Inference iteration and streams management -//! -template -class Iteration -{ - -public: - Iteration(int32_t id, const InferenceOptions& inference, ContextType& context, Bindings& bindings) - : mBindings(bindings) - , mStreamId(id) - , mDepth(1 + inference.overlap) - , mActive(mDepth) - , mEvents(mDepth) - , mEnqueueTimes(mDepth) - , mContext(&context) - { - for (int32_t d = 0; d < mDepth; ++d) - { - for (int32_t e = 0; e < static_cast(EventType::kNUM); ++e) - { - mEvents[d][e].reset(new TrtCudaEvent(!inference.spin)); - } - } - createEnqueueFunction(inference, context, bindings); - } - - bool query(bool skipTransfers) - { - if (mActive[mNext]) - { - return true; - } - - if (!skipTransfers) - { - record(EventType::kINPUT_S, StreamType::kINPUT); - mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); - record(EventType::kINPUT_E, StreamType::kINPUT); - wait(EventType::kINPUT_E, StreamType::kCOMPUTE); // Wait for input DMA before compute - } - - record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE); - recordEnqueueTime(); - if (!mEnqueue(getStream(StreamType::kCOMPUTE))) - { - return false; - } - recordEnqueueTime(); - record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE); - - if (!skipTransfers) - { - wait(EventType::kCOMPUTE_E, StreamType::kOUTPUT); // Wait for compute before output DMA - record(EventType::kOUTPUT_S, StreamType::kOUTPUT); - mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); - record(EventType::kOUTPUT_E, StreamType::kOUTPUT); - } - - mActive[mNext] = true; - moveNext(); - return true; - } - - float sync( - const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, std::vector& trace, bool skipTransfers) - { - if (mActive[mNext]) - { - if (skipTransfers) - { - getEvent(EventType::kCOMPUTE_E).synchronize(); - } - else - { - getEvent(EventType::kOUTPUT_E).synchronize(); - } - trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers)); - mActive[mNext] = false; - return getEvent(EventType::kCOMPUTE_S) - gpuStart; - } - return 0; - } - - void syncAll( - const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, std::vector& trace, bool skipTransfers) - { - for (int32_t d = 0; d < mDepth; ++d) - { - sync(cpuStart, gpuStart, trace, skipTransfers); - moveNext(); - } - } - - void wait(TrtCudaEvent& gpuStart) - { - getStream(StreamType::kINPUT).wait(gpuStart); - } - - void setInputData() - { - mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); - } - - void fetchOutputData() - { - mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); - } - -private: - void moveNext() - { - mNext = mDepth - 1 - mNext; - } - - TrtCudaStream& getStream(StreamType t) - { - return mStream[static_cast(t)]; - } - - TrtCudaEvent& getEvent(EventType t) - { - return *mEvents[mNext][static_cast(t)]; - } - - void record(EventType e, StreamType s) - { - getEvent(e).record(getStream(s)); - } - - void recordEnqueueTime() - { - mEnqueueTimes[mNext][enqueueStart] = getCurrentTime(); - enqueueStart = 1 - enqueueStart; - } - - TimePoint getEnqueueTime(bool start) - { - return mEnqueueTimes[mNext][start ? 0 : 1]; - } - - void wait(EventType e, StreamType s) - { - getStream(s).wait(getEvent(e)); - } - - InferenceTrace getTrace(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, bool skipTransfers) - { - float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_S) - gpuStart; - float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_E) - gpuStart; - float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_S) - gpuStart; - float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_E) - gpuStart; - - return InferenceTrace(mStreamId, - std::chrono::duration(getEnqueueTime(true) - cpuStart).count(), - std::chrono::duration(getEnqueueTime(false) - cpuStart).count(), is, ie, - getEvent(EventType::kCOMPUTE_S) - gpuStart, getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe); - } - - void createEnqueueFunction(const InferenceOptions& inference, nvinfer1::IExecutionContext& context, Bindings& /*bindings*/) - { - if (inference.batch) - mEnqueue = EnqueueFunction(EnqueueImplicit(context, mBindings.getDeviceBuffers(), inference.batch)); - else - mEnqueue = EnqueueFunction(EnqueueExplicit(context, mBindings.getDeviceBuffers())); - - if (inference.graph) - { - TrtCudaStream& stream = getStream(StreamType::kCOMPUTE); - // Avoid capturing initialization calls by executing the enqueue function at least once before starting CUDA graph capture. - const auto ret = mEnqueue(stream); - assert(ret); - stream.synchronize(); - - mGraph.beginCapture(stream); - // The built TRT engine may contain operations that are not permitted under CUDA graph capture mode. - // When the stream is capturing, the enqueue call may return false if the current CUDA graph capture fails. - if (mEnqueue(stream)) - { - mGraph.endCapture(stream); - mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph)); - } - else - { - mGraph.endCaptureOnError(stream); - // Ensure any CUDA error has been cleaned up. - cudaCheck(cudaGetLastError()); - sample::gLogWarning << "The built TensorRT engine contains operations that are not permitted under " - "CUDA graph capture mode." - << std::endl; - sample::gLogWarning << "The specified --useCudaGraph flag has been ignored. The inference will be " - "launched without using CUDA graph launch." - << std::endl; - } - } - } - - void createEnqueueFunction(const InferenceOptions&, nvinfer1::safe::IExecutionContext& context, Bindings&) - { - mEnqueue = EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers())); - } - - Bindings& mBindings; - - TrtCudaGraph mGraph; - EnqueueFunction mEnqueue; - - int32_t mStreamId{0}; - int32_t mNext{0}; - int32_t mDepth{2}; // default to double buffer to hide DMA transfers - - std::vector mActive; - MultiStream mStream; - std::vector mEvents; - - int32_t enqueueStart{0}; - std::vector mEnqueueTimes; - ContextType* mContext{nullptr}; -}; - -template -bool inferenceLoop(std::vector>>& iStreams, const TimePoint& cpuStart, - const TrtCudaEvent& gpuStart, int iterations, float maxDurationMs, float warmupMs, - std::vector& trace, bool skipTransfers, float idleMs) -{ - float durationMs = 0; - int32_t skip = 0; - - for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs; ++i) - { - for (auto& s : iStreams) - { - if (!s->query(skipTransfers)) - return false; - } - for (auto& s : iStreams) - { - durationMs = std::max(durationMs, s->sync(cpuStart, gpuStart, trace, skipTransfers)); - } - if (durationMs < warmupMs) // Warming up - { - if (durationMs) // Skip complete iterations - ++skip; - - continue; - } - if (idleMs != 0.F) - std::this_thread::sleep_for(std::chrono::duration(idleMs)); - } - for (auto& s : iStreams) - { - s->syncAll(cpuStart, gpuStart, trace, skipTransfers); - } - return true; -} - -template -void inferenceExecution(const InferenceOptions& inference, InferenceEnvironment& iEnv, SyncStruct& sync, - const int32_t threadIdx, const int32_t streamsPerThread, int32_t device, std::vector& trace) -{ - float warmupMs = inference.warmup; - float durationMs = inference.duration * 1000.F + warmupMs; - - cudaCheck(cudaSetDevice(device)); - - std::vector>> iStreams; - - for (int32_t s = 0; s < streamsPerThread; ++s) - { - const int32_t streamId{threadIdx * streamsPerThread + s}; - auto* iteration = new Iteration( - streamId, inference, *iEnv.template getContext(streamId), *iEnv.bindings[streamId]); - if (inference.skipTransfers) - { - iteration->setInputData(); - } - iStreams.emplace_back(iteration); - } - - for (auto& s : iStreams) - { - s->wait(sync.gpuStart); - } - - std::vector localTrace; - if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, inference.iterations, durationMs, warmupMs, localTrace, - inference.skipTransfers, inference.idle)) - { - iEnv.error = true; - } - - if (inference.skipTransfers) - { - for (auto& s : iStreams) - { - s->fetchOutputData(); - } - } - - sync.mutex.lock(); - trace.insert(trace.end(), localTrace.begin(), localTrace.end()); - sync.mutex.unlock(); -} - -inline std::thread makeThread(const InferenceOptions& inference, InferenceEnvironment& iEnv, SyncStruct& sync, - int32_t threadIdx, int32_t streamsPerThread, int32_t device, std::vector& trace) -{ - - if (iEnv.safe) - { - ASSERT(sample::hasSafeRuntime()); - return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), - std::ref(sync), threadIdx, streamsPerThread, device, std::ref(trace)); - } - - return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), - std::ref(sync), threadIdx, streamsPerThread, device, std::ref(trace)); -} - -} // namespace - -bool runInference( - const InferenceOptions& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace) -{ - cudaCheck(cudaProfilerStart()); - - trace.resize(0); - - SyncStruct sync; - sync.sleep = inference.sleep; - sync.mainStream.sleep(&sync.sleep); - sync.cpuStart = getCurrentTime(); - sync.gpuStart.record(sync.mainStream); - - // When multiple streams are used, trtexec can run inference in two modes: - // (1) if inference.threads is true, then run each stream on each thread. - // (2) if inference.threads is false, then run all streams on the same thread. - const int32_t numThreads = inference.threads ? inference.streams : 1; - const int32_t streamsPerThread = inference.threads ? 1 : inference.streams; - - std::vector threads; - for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) - { - threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx, streamsPerThread, device, trace)); - } - for (auto& th : threads) - { - th.join(); - } - - cudaCheck(cudaProfilerStop()); - - auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) { return a.h2dStart < b.h2dStart; }; - std::sort(trace.begin(), trace.end(), cmpTrace); - - return !iEnv.error; -} - -namespace -{ -size_t reportGpuMemory() -{ - static size_t prevFree{0}; - size_t free{0}; - size_t total{0}; - size_t newlyAllocated{0}; - cudaCheck(cudaMemGetInfo(&free, &total)); - sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB"; - if (prevFree != 0) - { - newlyAllocated = (prevFree - free); - sample::gLogInfo << ", newly allocated GPU memory = " << newlyAllocated / 1024.0_MiB << " GiB"; - } - sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" << std::endl; - prevFree = free; - return newlyAllocated; -} -} // namespace - -//! Returns true if deserialization is slower than expected or fails. -bool timeDeserialize(InferenceEnvironment& iEnv) -{ - constexpr int32_t kNB_ITERS{20}; - std::unique_ptr rt{nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())}; - std::unique_ptr engine; - - std::unique_ptr safeRT{sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - std::unique_ptr safeEngine; - - if (iEnv.safe) - { - ASSERT(sample::hasSafeRuntime() && safeRT != nullptr); - safeRT->setErrorRecorder(&gRecorder); - } - - auto timeDeserializeFn = [&]() -> float { - bool deserializeOK{false}; - engine.reset(nullptr); - safeEngine.reset(nullptr); - auto startClock = std::chrono::high_resolution_clock::now(); - if (iEnv.safe) - { - safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(), iEnv.engineBlob.size())); - deserializeOK = (safeEngine != nullptr); - } - else - { - engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(), iEnv.engineBlob.size())); - deserializeOK = (engine != nullptr); - } - auto endClock = std::chrono::high_resolution_clock::now(); - // return NAN if deserialization failed. - return deserializeOK ? std::chrono::duration(endClock - startClock).count() : NAN; - }; - - // Warmup the caches to make sure that cache thrashing isn't throwing off the results - { - sample::gLogInfo << "Begin deserialization warmup..." << std::endl; - for (int32_t i = 0, e = 2; i < e; ++i) - { - timeDeserializeFn(); - } - } - sample::gLogInfo << "Begin deserialization engine timing..." << std::endl; - float const first = timeDeserializeFn(); - - // Check if first deserialization suceeded. - if (std::isnan(first)) - { - sample::gLogError << "Engine deserialization failed." << std::endl; - return true; - } - - sample::gLogInfo << "First deserialization time = " << first << " milliseconds" << std::endl; - - // Record initial gpu memory state. - reportGpuMemory(); - - float totalTime{0.F}; - for (int32_t i = 0; i < kNB_ITERS; ++i) - { - totalTime += timeDeserializeFn(); - } - const auto averageTime = totalTime / kNB_ITERS; - // reportGpuMemory sometimes reports zero after a single deserialization of a small engine, - // so use the size of memory for all the iterations. - const auto totalEngineSizeGpu = reportGpuMemory(); - sample::gLogInfo << "Total deserialization time = " << totalTime << " milliseconds in " << kNB_ITERS - << " iterations, average time = " << averageTime << " milliseconds, first time = " << first - << " milliseconds." << std::endl; - sample::gLogInfo << "Deserialization Bandwidth = " << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" << std::endl; - - // If the first deserialization is more than tolerance slower than - // the average deserialization, return true, which means an error occurred. - // The tolerance is set to 2x since the deserialization time is quick and susceptible - // to caching issues causing problems in the first timing. - const auto tolerance = 2.0F; - const bool isSlowerThanExpected = first > averageTime * tolerance; - if (isSlowerThanExpected) - { - sample::gLogInfo << "First deserialization time divided by average time is " << (first / averageTime) - << ". Exceeds tolerance of " << tolerance << "x." << std::endl; - } - return isSlowerThanExpected; -} - -std::string getLayerInformation(const InferenceEnvironment& iEnv, nvinfer1::LayerInformationFormat format) -{ - auto runtime = std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); - auto inspector = std::unique_ptr(iEnv.engine->createEngineInspector()); - if (!iEnv.context.empty()) - { - inspector->setExecutionContext(iEnv.context.front().get()); - } - std::string result = inspector->getEngineInformation(format); - return result; -} - -} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleInference.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleInference.h deleted file mode 100644 index 1c21f592f..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleInference.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_INFERENCE_H -#define TRT_SAMPLE_INFERENCE_H - -#include "sampleReporting.h" -#include "sampleUtils.h" - -#include -#include -#include -#include - -#include "NvInfer.h" - -#if (NV_TENSORRT_MAJOR > 7) - -#include "NvInferSafeRuntime.h" - -namespace sample -{ - -struct InferenceEnvironment -{ - TrtUniquePtr engine; - std::unique_ptr profiler; - std::vector> context; - std::vector> bindings; - bool error{false}; - - std::vector engineBlob; - - bool safe{false}; - std::unique_ptr safeEngine; - std::vector> safeContext; - - template - inline ContextType* getContext(int32_t streamIdx); -}; - -template <> -inline nvinfer1::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx) -{ - return context[streamIdx].get(); -} - -template <> -inline nvinfer1::safe::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx) -{ - return safeContext[streamIdx].get(); -} - -//! -//! \brief Set up contexts and bindings for inference -//! -bool setUpInference(InferenceEnvironment& iEnv, const InferenceOptions& inference); - -//! -//! \brief Deserialize the engine and time how long it takes. -//! -bool timeDeserialize(InferenceEnvironment& iEnv); - -//! -//! \brief Run inference and collect timing, return false if any error hit during inference -//! -bool runInference( - const InferenceOptions& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace); - -//! -//! \brief Get layer information of the engine. -//! -std::string getLayerInformation(const InferenceEnvironment& iEnv, nvinfer1::LayerInformationFormat format); - -} // namespace sample - -#endif - -#endif // TRT_SAMPLE_INFERENCE_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.cpp b/src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.cpp deleted file mode 100644 index 0afd163f7..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.cpp +++ /dev/null @@ -1,1778 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NvInfer.h" - -#include "logger.h" -#include "sampleOptions.h" - -namespace sample -{ - -namespace -{ - -std::vector splitToStringVec(const std::string& option, char separator) -{ - std::vector options; - - for (size_t start = 0; start < option.length();) - { - size_t separatorIndex = option.find(separator, start); - if (separatorIndex == std::string::npos) - { - separatorIndex = option.length(); - } - options.emplace_back(option.substr(start, separatorIndex - start)); - start = separatorIndex + 1; - } - - return options; -} - -template -T stringToValue(const std::string& option) -{ - return T{option}; -} - -template <> -int32_t stringToValue(const std::string& option) -{ - return std::stoi(option); -} - -template <> -float stringToValue(const std::string& option) -{ - return std::stof(option); -} - -template <> -double stringToValue(const std::string& option) -{ - return std::stod(option); -} - -template <> -bool stringToValue(const std::string& option) -{ - return true; -} - -template <> -std::vector stringToValue>(const std::string& option) -{ - std::vector shape; - std::vector dimsStrings = splitToStringVec(option, 'x'); - for (const auto& d : dimsStrings) - { - shape.push_back(stringToValue(d)); - } - return shape; -} - -template <> -nvinfer1::DataType stringToValue(const std::string& option) -{ - const std::unordered_map strToDT{{"fp32", nvinfer1::DataType::kFLOAT}, - {"fp16", nvinfer1::DataType::kHALF}, {"int8", nvinfer1::DataType::kINT8}, - {"int32", nvinfer1::DataType::kINT32}}; - const auto& dt = strToDT.find(option); - if (dt == strToDT.end()) - { - throw std::invalid_argument("Invalid DataType " + option); - } - return dt->second; -} - -template <> -nvinfer1::TensorFormats stringToValue(const std::string& option) -{ - std::vector optionStrings = splitToStringVec(option, '+'); - const std::unordered_map strToFmt{{"chw", nvinfer1::TensorFormat::kLINEAR}, - {"chw2", nvinfer1::TensorFormat::kCHW2}, {"chw4", nvinfer1::TensorFormat::kCHW4}, - {"hwc8", nvinfer1::TensorFormat::kHWC8}, {"chw16", nvinfer1::TensorFormat::kCHW16}, - {"chw32", nvinfer1::TensorFormat::kCHW32}, {"dhwc8", nvinfer1::TensorFormat::kDHWC8}, - {"hwc", nvinfer1::TensorFormat::kHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, - {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}}; - nvinfer1::TensorFormats formats{}; - for (auto f : optionStrings) - { - const auto& tf = strToFmt.find(f); - if (tf == strToFmt.end()) - { - throw std::invalid_argument(std::string("Invalid TensorFormat ") + f); - } - formats |= 1U << static_cast(tf->second); - } - - return formats; -} - -template <> -IOFormat stringToValue(const std::string& option) -{ - IOFormat ioFormat{}; - const size_t colon = option.find(':'); - - if (colon == std::string::npos) - { - throw std::invalid_argument(std::string("Invalid IOFormat ") + option); - } - - ioFormat.first = stringToValue(option.substr(0, colon)); - ioFormat.second = stringToValue(option.substr(colon + 1)); - - return ioFormat; -} - -template -std::pair splitNameAndValue(const std::string& s) -{ - std::string tensorName; - std::string valueString; - // Split on the last : - std::vector nameRange{splitToStringVec(s, ':')}; - // Everything before the last : is the name - tensorName = nameRange[0]; - for (size_t i = 1; i < nameRange.size() - 1; i++) - { - tensorName += ":" + nameRange[i]; - } - // Value is the string element after the last : - valueString = nameRange[nameRange.size() - 1]; - return std::pair(tensorName, stringToValue(valueString)); -} - -template -void splitInsertKeyValue(const std::vector& kvList, T& map) -{ - for (const auto& kv : kvList) - { - map.insert(splitNameAndValue(kv)); - } -} - -const char* boolToEnabled(bool enable) -{ - return enable ? "Enabled" : "Disabled"; -} - -//! Check if input option exists in input arguments. -//! If it does: return its value, erase the argument and return true. -//! If it does not: return false. -template -bool getAndDelOption(Arguments& arguments, const std::string& option, T& value) -{ - const auto match = arguments.find(option); - if (match != arguments.end()) - { - value = stringToValue(match->second); - arguments.erase(match); - return true; - } - - return false; -} - -//! Check if input option exists in input arguments. -//! If it does: return false in value, erase the argument and return true. -//! If it does not: return false. -bool getAndDelNegOption(Arguments& arguments, const std::string& option, bool& value) -{ - bool dummy; - if (getAndDelOption(arguments, option, dummy)) - { - value = false; - return true; - } - return false; -} - -//! Check if input option exists in input arguments. -//! If it does: add all the matched arg values to values vector, erase the argument and return true. -//! If it does not: return false. -template -bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, std::vector& values) -{ - const auto match = arguments.equal_range(option); - if (match.first == match.second) - { - return false; - } - - auto addToValues = [&values](Arguments::value_type& argValue) {values.emplace_back(stringToValue(argValue.second));}; - std::for_each(match.first, match.second, addToValues); - arguments.erase(match.first, match.second); - - return true; -} - -void insertShapesBuild(std::unordered_map& shapes, nvinfer1::OptProfileSelector selector, const std::string& name, const std::vector& dims) -{ - shapes[name][static_cast(selector)] = dims; -} - -void insertShapesInference(std::unordered_map>& shapes, const std::string& name, const std::vector& dims) -{ - shapes[name] = dims; -} - -std::string removeSingleQuotationMarks(std::string& str) -{ - std::vector strList{splitToStringVec(str, '\'')}; - // Remove all the escaped single quotation marks - std::string retVal = ""; - // Do not really care about unterminated sequences - for (size_t i = 0; i < strList.size(); i++) - { - retVal += strList[i]; - } - return retVal; -} - -void getLayerPrecisions(Arguments& arguments, char const* argument, LayerPrecisions& layerPrecisions) -{ - std::string list; - if (!getAndDelOption(arguments, argument, list)) - { - return; - } - - // The layerPrecisions flag contains comma-separated layerName:precision pairs. - std::vector precisionList{splitToStringVec(list, ',')}; - for (auto const& s : precisionList) - { - auto namePrecisionPair = splitNameAndValue(s); - auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); - layerPrecisions[layerName] = namePrecisionPair.second; - } -} - -void getLayerOutputTypes(Arguments& arguments, char const* argument, LayerOutputTypes& layerOutputTypes) -{ - std::string list; - if (!getAndDelOption(arguments, argument, list)) - { - return; - } - - // The layerOutputTypes flag contains comma-separated layerName:types pairs. - std::vector precisionList{splitToStringVec(list, ',')}; - for (auto const& s : precisionList) - { - auto namePrecisionPair = splitNameAndValue(s); - auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); - auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+'); - std::vector typeVec(typeStrings.size(), nvinfer1::DataType::kFLOAT); - std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(), stringToValue); - layerOutputTypes[layerName] = typeVec; - } -} - -bool getShapesBuild(Arguments& arguments, std::unordered_map& shapes, char const* argument, - nvinfer1::OptProfileSelector selector) -{ - std::string list; - bool retVal = getAndDelOption(arguments, argument, list); - std::vector shapeList{splitToStringVec(list, ',')}; - for (const auto& s : shapeList) - { - auto nameDimsPair = splitNameAndValue>(s); - auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); - auto dims = nameDimsPair.second; - insertShapesBuild(shapes, selector, tensorName, dims); - } - return retVal; -} - -bool getShapesInference(Arguments& arguments, std::unordered_map>& shapes, const char* argument) -{ - std::string list; - bool retVal = getAndDelOption(arguments, argument, list); - std::vector shapeList{splitToStringVec(list, ',')}; - for (const auto& s : shapeList) - { - auto nameDimsPair = splitNameAndValue>(s); - auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); - auto dims = nameDimsPair.second; - insertShapesInference(shapes, tensorName, dims); - } - return retVal; -} - -void processShapes(std::unordered_map& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib) -{ - // Only accept optShapes only or all three of minShapes, optShapes, maxShapes - if ( ((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes only, both minShapes and maxShapes - || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes - || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes - { - if (calib) - { - throw std::invalid_argument("Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib"); - } - else - { - throw std::invalid_argument("Must specify only --optShapes or all of --minShapes, --optShapes, --maxShapes"); - } - } - - // If optShapes only, expand optShapes to minShapes and maxShapes - if (optShapes && !minShapes && !maxShapes) - { - std::unordered_map newShapes; - for (auto& s : shapes) - { - insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kMIN, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kOPT, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - insertShapesBuild(newShapes, nvinfer1::OptProfileSelector::kMAX, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - } - shapes = newShapes; - } -} - -template -void printShapes(std::ostream& os, const char* phase, const T& shapes) -{ - if (shapes.empty()) - { - os << "Input " << phase << " shapes: model" << std::endl; - } - else - { - for (const auto& s : shapes) - { - os << "Input " << phase << " shape: " << s.first << "=" << s.second << std::endl; - } - } -} - -std::ostream& printBatch(std::ostream& os, int32_t maxBatch) -{ - if (maxBatch != maxBatchNotProvided) - { - os << maxBatch; - } - else - { - os << "explicit batch"; - } - return os; -} - -std::ostream& printTacticSources(std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources) -{ - if (!enabledSources && !disabledSources) - { - os << "Using default tactic sources"; - } - else - { - auto const addSource = [&](uint32_t source, std::string const& name) { - if (enabledSources & source) - { - os << name << " [ON], "; - } - else if (disabledSources & source) - { - os << name << " [OFF], "; - } - }; - - addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS), "cublas"); - addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS_LT), "cublasLt"); -#if (NV_TENSORRT_MAJOR > 7) - addSource(1U << static_cast(nvinfer1::TacticSource::kCUDNN), "cudnn"); -#endif - } - return os; -} - -std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) -{ - os << "FP32"; - if (options.fp16) - { - os << "+FP16"; - } - if (options.int8) - { - os << "+INT8"; - } - if (options.precisionConstraints == PrecisionConstraints::kOBEY) - { - os << " (obey precision constraints)"; - } - if (options.precisionConstraints == PrecisionConstraints::kPREFER) - { - os << " (prefer precision constraints)"; - } - return os; -} - -std::ostream& printTimingCache(std::ostream& os, BuildOptions const& options) -{ - switch (options.timingCacheMode) - { - case TimingCacheMode::kGLOBAL: os << "global"; break; - case TimingCacheMode::kLOCAL: os << "local"; break; - case TimingCacheMode::kDISABLE: os << "disable"; break; - } - return os; -} - -std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) -{ - switch (options.sparsity) - { - case SparsityFlag::kDISABLE: os << "Disabled"; break; - case SparsityFlag::kENABLE: os << "Enabled"; break; - case SparsityFlag::kFORCE: os << "Forced"; break; - } - - return os; -} - -std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) -{ - auto const printValueOrDefault = [&os](double const val) { - if (val >= 0) - { - os << val << " MiB"; - } - else - { - os << "default"; - } - }; - os << "workspace: "; printValueOrDefault(options.workspace); os << ", "; - os << "dlaSRAM: "; printValueOrDefault(options.dlaSRAM); os << ", "; - os << "dlaLocalDRAM: "; printValueOrDefault(options.dlaLocalDRAM); os << ", "; - os << "dlaGlobalDRAM: "; printValueOrDefault(options.dlaGlobalDRAM); - return os; -} - -} // namespace - -Arguments argsToArgumentsMap(int32_t argc, char* argv[]) -{ - Arguments arguments; - for (int32_t i = 1; i < argc; ++i) - { - auto valuePtr = strchr(argv[i], '='); - if (valuePtr) - { - std::string value{valuePtr + 1}; - arguments.emplace(std::string(argv[i], valuePtr - argv[i]), value); - } - else - { - arguments.emplace(argv[i], ""); - } - } - return arguments; -} - -void BaseModelOptions::parse(Arguments& arguments) -{ - if (getAndDelOption(arguments, "--onnx", model)) - { - format = ModelFormat::kONNX; - } - else if (getAndDelOption(arguments, "--uff", model)) - { - format = ModelFormat::kUFF; - } - else if (getAndDelOption(arguments, "--model", model)) - { - format = ModelFormat::kCAFFE; - } -} - -void UffInput::parse(Arguments& arguments) -{ - getAndDelOption(arguments, "--uffNHWC", NHWC); - std::vector args; - if (getAndDelRepeatedOption(arguments, "--uffInput", args)) - { - for (const auto& i : args) - { - std::vector values{splitToStringVec(i, ',')}; - if (values.size() == 4) - { - nvinfer1::Dims3 dims{std::stoi(values[1]), std::stoi(values[2]), std::stoi(values[3])}; - inputs.emplace_back(values[0], dims); - } - else - { - throw std::invalid_argument(std::string("Invalid uffInput ") + i); - } - } - } -} - -void ModelOptions::parse(Arguments& arguments) -{ - baseModel.parse(arguments); - - switch (baseModel.format) - { - case ModelFormat::kCAFFE: - { - getAndDelOption(arguments, "--deploy", prototxt); - break; - } - case ModelFormat::kUFF: - { - uffInputs.parse(arguments); - if (uffInputs.inputs.empty()) - { - throw std::invalid_argument("Uff models require at least one input"); - } - break; - } - case ModelFormat::kONNX: - break; - case ModelFormat::kANY: - { - if (getAndDelOption(arguments, "--deploy", prototxt)) - { - baseModel.format = ModelFormat::kCAFFE; - } - break; - } - } - - // The --output flag should only be used with Caffe and UFF. It has no effect on ONNX. - std::vector outArgs; - if (getAndDelRepeatedOption(arguments, "--output", outArgs)) - { - for (const auto& o : outArgs) - { - for (auto& v : splitToStringVec(o, ',')) - { - outputs.emplace_back(std::move(v)); - } - } - } - if (baseModel.format == ModelFormat::kCAFFE || baseModel.format == ModelFormat::kUFF) - { - if (outputs.empty()) - { - throw std::invalid_argument("Caffe and Uff models require at least one output"); - } - } - else if (baseModel.format == ModelFormat::kONNX) - { - if (!outputs.empty()) - { - throw std::invalid_argument("The --output flag should not be used with ONNX models."); - } - } -} - -void BuildOptions::parse(Arguments& arguments) -{ - auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { - std::string list; - getAndDelOption(arguments, argument, list); - std::vector formats{splitToStringVec(list, ',')}; - for (const auto& f : formats) - { - formatsVector.push_back(stringToValue(f)); - } - }; - - getFormats(inputFormats, "--inputIOFormats"); - getFormats(outputFormats, "--outputIOFormats"); - - bool addedExplicitBatchFlag{false}; - getAndDelOption(arguments, "--explicitBatch", addedExplicitBatchFlag); - if (addedExplicitBatchFlag) - { - sample::gLogWarning << "--explicitBatch flag has been deprecated and has no effect!" << std::endl; - sample::gLogWarning << "Explicit batch dim is automatically enabled if input model is ONNX or if dynamic " - << "shapes are provided when the engine is built." << std::endl; - } - - bool minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); - bool optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); - bool maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); - processShapes(shapes, minShapes, optShapes, maxShapes, false); - bool minShapesCalib - = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN); - bool optShapesCalib - = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT); - bool maxShapesCalib - = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX); - processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true); - - bool addedExplicitPrecisionFlag{false}; - getAndDelOption(arguments, "--explicitPrecision", addedExplicitPrecisionFlag); - if (addedExplicitPrecisionFlag) - { - sample::gLogWarning << "--explicitPrecision flag has been deprecated and has no effect!" << std::endl; - } - - if (getAndDelOption(arguments, "--workspace", workspace)) - { - sample::gLogWarning << "--workspace flag has been deprecated by --memPoolSize flag." << std::endl; - } - - std::string memPoolSizes; - getAndDelOption(arguments, "--memPoolSize", memPoolSizes); - std::vector memPoolSpecs{splitToStringVec(memPoolSizes, ',')}; - for (auto const& memPoolSpec : memPoolSpecs) - { - std::string memPoolName; - double memPoolSize; - std::tie(memPoolName, memPoolSize) = splitNameAndValue(memPoolSpec); - if (memPoolSize < 0) - { - throw std::invalid_argument(std::string("Negative memory pool size: ") + std::to_string(memPoolSize)); - } - if (memPoolName == "workspace") - { - workspace = memPoolSize; - } - else if (memPoolName == "dlaSRAM") - { - dlaSRAM = memPoolSize; - } - else if (memPoolName == "dlaLocalDRAM") - { - dlaLocalDRAM = memPoolSize; - } - else if (memPoolName == "dlaGlobalDRAM") - { - dlaGlobalDRAM = memPoolSize; - } - else if (!memPoolName.empty()) - { - throw std::invalid_argument(std::string("Unknown memory pool: ") + memPoolName); - } - } - - getAndDelOption(arguments, "--maxBatch", maxBatch); - getAndDelOption(arguments, "--minTiming", minTiming); - getAndDelOption(arguments, "--avgTiming", avgTiming); - - bool best{false}; - getAndDelOption(arguments, "--best", best); - if (best) - { - int8 = true; - fp16 = true; - } - - getAndDelOption(arguments, "--refit", refittable); - getAndDelNegOption(arguments, "--noTF32", tf32); - getAndDelOption(arguments, "--fp16", fp16); - getAndDelOption(arguments, "--int8", int8); - getAndDelOption(arguments, "--safe", safe); - getAndDelOption(arguments, "--consistency", consistency); - getAndDelOption(arguments, "--restricted", restricted); - - getAndDelOption(arguments, "--directIO", directIO); - - std::string precisionConstraintsString; - getAndDelOption(arguments, "--precisionConstraints", precisionConstraintsString); - if (!precisionConstraintsString.empty()) - { - const std::unordered_map precisionConstraintsMap - = {{"obey", PrecisionConstraints::kOBEY}, {"prefer", PrecisionConstraints::kPREFER}, - {"none", PrecisionConstraints::kNONE}}; - auto it = precisionConstraintsMap.find(precisionConstraintsString); - if (it == precisionConstraintsMap.end()) - { - throw std::invalid_argument(std::string("Unknown precision constraints: ") + precisionConstraintsString); - } - precisionConstraints = it->second; - } - else - { - precisionConstraints = PrecisionConstraints::kNONE; - } - - getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions); - getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes); - - if (layerPrecisions.empty() && layerOutputTypes.empty() && precisionConstraints != PrecisionConstraints::kNONE) - { - sample::gLogWarning << "When --precisionConstraints flag is set to \"obey\" or \"prefer\", please add " - << "--layerPrecision/--layerOutputTypes flags to set layer-wise precisions and output " - << "types." << std::endl; - } - else if ((!layerPrecisions.empty() || !layerOutputTypes.empty()) - && precisionConstraints == PrecisionConstraints::kNONE) - { - sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no effect when --precisionConstraints " - << "flag is set to \"none\"." << std::endl; - } - - std::string sparsityString; - getAndDelOption(arguments, "--sparsity", sparsityString); - if (sparsityString == "disable") - { - sparsity = SparsityFlag::kDISABLE; - } - else if (sparsityString == "enable") - { - sparsity = SparsityFlag::kENABLE; - } - else if (sparsityString == "force") - { - sparsity = SparsityFlag::kFORCE; - } - else if (!sparsityString.empty()) - { - throw std::invalid_argument(std::string("Unknown sparsity mode: ") + sparsityString); - } - - bool calibCheck = getAndDelOption(arguments, "--calib", calibration); - if (int8 && calibCheck && !shapes.empty() && shapesCalib.empty()) - { - shapesCalib = shapes; - } - - std::string profilingVerbosityString; - if (getAndDelOption(arguments, "--nvtxMode", profilingVerbosityString)) - { - sample::gLogWarning << "--nvtxMode flag has been deprecated by --profilingVerbosity flag." << std::endl; - } - - getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString); - if (profilingVerbosityString == "layer_names_only") - { -#if (NV_TENSORRT_MAJOR > 7) - profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; -#else - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDEFAULT; -#endif - } - else if (profilingVerbosityString == "none") - { - profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE; - } -#if (NV_TENSORRT_MAJOR > 7) - else if (profilingVerbosityString == "detailed") - { - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; - } -#endif - else if (profilingVerbosityString == "default") - { -#if (NV_TENSORRT_MAJOR > 7) - sample::gLogWarning << "--profilingVerbosity=default has been deprecated by " - "--profilingVerbosity=layer_names_only." - << std::endl; - profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; -#else - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDEFAULT; -#endif - } - else if (profilingVerbosityString == "verbose") - { -#if (NV_TENSORRT_MAJOR > 7) - sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated by --profilingVerbosity=detailed." - << std::endl; - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; -#else - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDEFAULT; -#endif - } - else if (!profilingVerbosityString.empty()) - { - throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") + profilingVerbosityString); - } - - if (getAndDelOption(arguments, "--loadEngine", engine)) - { - load = true; - } - if (getAndDelOption(arguments, "--saveEngine", engine)) - { - save = true; - } - if (load && save) - { - throw std::invalid_argument("Incompatible load and save engine options selected"); - } - - std::string tacticSourceArgs; - if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs)) - { - std::vector tacticList = splitToStringVec(tacticSourceArgs, ','); - for (auto& t : tacticList) - { - bool enable{false}; - if (t.front() == '+') - { - enable = true; - } - else if (t.front() != '-') - { - throw std::invalid_argument( - "Tactic source must be prefixed with + or -, indicating whether it should be enabled or disabled " - "respectively."); - } - t.erase(0, 1); - - const auto toUpper = [](std::string& sourceName) { - std::transform( - sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); }); - return sourceName; - }; - - nvinfer1::TacticSource source{}; - t = toUpper(t); - if (t == "CUBLAS") - { - source = nvinfer1::TacticSource::kCUBLAS; - } - else if (t == "CUBLASLT" || t == "CUBLAS_LT") - { - source = nvinfer1::TacticSource::kCUBLAS_LT; - } -#if (NV_TENSORRT_MAJOR > 7) - else if (t == "CUDNN") - { - source = nvinfer1::TacticSource::kCUDNN; - } -#endif - else - { - throw std::invalid_argument(std::string("Unknown tactic source: ") + t); - } - - uint32_t sourceBit = 1U << static_cast(source); - - if (enable) - { - enabledTactics |= sourceBit; - } - else - { - disabledTactics |= sourceBit; - } - - if (enabledTactics & disabledTactics) - { - throw std::invalid_argument(std::string("Cannot enable and disable ") + t); - } - } - } - - bool noBuilderCache{false}; - getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); - getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); - if (noBuilderCache) - { - timingCacheMode = TimingCacheMode::kDISABLE; - } - else if (!timingCacheFile.empty()) - { - timingCacheMode = TimingCacheMode::kGLOBAL; - } - else - { - timingCacheMode = TimingCacheMode::kLOCAL; - } -} - -void SystemOptions::parse(Arguments& arguments) -{ - getAndDelOption(arguments, "--device", device); - getAndDelOption(arguments, "--useDLACore", DLACore); - getAndDelOption(arguments, "--allowGPUFallback", fallback); - std::string pluginName; - while (getAndDelOption(arguments, "--plugins", pluginName)) - { - plugins.emplace_back(pluginName); - } -} - -void InferenceOptions::parse(Arguments& arguments) -{ - getAndDelOption(arguments, "--streams", streams); - getAndDelOption(arguments, "--iterations", iterations); - getAndDelOption(arguments, "--duration", duration); - getAndDelOption(arguments, "--warmUp", warmup); - getAndDelOption(arguments, "--sleepTime", sleep); - getAndDelOption(arguments, "--idleTime", idle); - bool exposeDMA{false}; - if (getAndDelOption(arguments, "--exposeDMA", exposeDMA)) - { - overlap = !exposeDMA; - } - getAndDelOption(arguments, "--noDataTransfers", skipTransfers); - getAndDelOption(arguments, "--useManagedMemory", useManaged); - getAndDelOption(arguments, "--useSpinWait", spin); - getAndDelOption(arguments, "--threads", threads); - getAndDelOption(arguments, "--useCudaGraph", graph); - getAndDelOption(arguments, "--separateProfileRun", rerun); - getAndDelOption(arguments, "--buildOnly", skip); - getAndDelOption(arguments, "--timeDeserialize", timeDeserialize); - getAndDelOption(arguments, "--timeRefit", timeRefit); - - std::string list; - getAndDelOption(arguments, "--loadInputs", list); - std::vector inputsList{splitToStringVec(list, ',')}; - splitInsertKeyValue(inputsList, inputs); - - getShapesInference(arguments, shapes, "--shapes"); - getAndDelOption(arguments, "--batch", batch); -} - -void ReportingOptions::parse(Arguments& arguments) -{ - getAndDelOption(arguments, "--percentile", percentile); - getAndDelOption(arguments, "--avgRuns", avgs); - getAndDelOption(arguments, "--verbose", verbose); - getAndDelOption(arguments, "--dumpRefit", refit); - getAndDelOption(arguments, "--dumpOutput", output); - getAndDelOption(arguments, "--dumpProfile", profile); - getAndDelOption(arguments, "--dumpLayerInfo", layerInfo); - getAndDelOption(arguments, "--exportTimes", exportTimes); - getAndDelOption(arguments, "--exportOutput", exportOutput); - getAndDelOption(arguments, "--exportProfile", exportProfile); - getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo); - if (percentile < 0 || percentile > 100) - { - throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]"); - } -} - -bool parseHelp(Arguments& arguments) -{ - bool helpLong{false}; - bool helpShort{false}; - getAndDelOption(arguments, "--help", helpLong); - getAndDelOption(arguments, "-h", helpShort); - return helpLong || helpShort; -} - -void AllOptions::parse(Arguments& arguments) -{ - model.parse(arguments); - build.parse(arguments); - system.parse(arguments); - inference.parse(arguments); - - // Use explicitBatch when input model is ONNX or when dynamic shapes are used. - const bool isOnnx{model.baseModel.format == ModelFormat::kONNX}; - const bool hasDynamicShapes{!build.shapes.empty() || !inference.shapes.empty()}; - const bool detectedExplicitBatch = isOnnx || hasDynamicShapes; - - // Throw an error if user tries to use --batch or --maxBatch when the engine has explicit batch dim. - const bool maxBatchWasSet{build.maxBatch != maxBatchNotProvided}; - const bool batchWasSet{inference.batch != batchNotProvided}; - if (detectedExplicitBatch && (maxBatchWasSet || batchWasSet)) - { - throw std::invalid_argument( - "The --batch and --maxBatch flags should not be used when the input model is ONNX or when dynamic shapes " - "are provided. Please use --optShapes and --shapes to set input shapes instead."); - } - - // If batch and/or maxBatch is not set and the engine has implicit batch dim, set them to default values. - if (!detectedExplicitBatch) - { - // If batch is not set, set it to default value. - if (!batchWasSet) - { - inference.batch = defaultBatch; - } - // If maxBatch is not set, set it to be equal to batch. - if (!maxBatchWasSet) - { - build.maxBatch = inference.batch; - } - // MaxBatch should not be less than batch. - if (build.maxBatch < inference.batch) - { - throw std::invalid_argument("Build max batch " + std::to_string(build.maxBatch) - + " is less than inference batch " + std::to_string(inference.batch)); - } - } - - if (build.shapes.empty() && !inference.shapes.empty()) - { - // If --shapes are provided but --optShapes are not, assume that optShapes is the same as shapes. - for (auto& s : inference.shapes) - { - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMIN, s.first, s.second); - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kOPT, s.first, s.second); - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMAX, s.first, s.second); - } - } - else if (!build.shapes.empty() && inference.shapes.empty()) - { - // If --optShapes are provided but --shapes are not, assume that shapes is the same as optShapes. - for (auto& s : build.shapes) - { - insertShapesInference( - inference.shapes, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - } - } - - reporting.parse(arguments); - helps = parseHelp(arguments); - - if (!helps) - { - if (!build.load && model.baseModel.format == ModelFormat::kANY) - { - throw std::invalid_argument("Model missing or format not recognized"); - } - if (build.safe && system.DLACore >= 0) - { - auto checkSafeDLAFormats = [](std::vector const& fmt) { - return fmt.empty() ? false : std::all_of(fmt.begin(), fmt.end(), [](IOFormat const& pair) { - bool supported{false}; - bool const isLINEAR{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kLINEAR)}; - bool const isCHW4{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW4)}; - bool const isCHW32{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW32)}; - bool const isCHW16{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW16)}; - supported |= pair.first == nvinfer1::DataType::kINT8 && (isLINEAR || isCHW4 || isCHW32); - supported |= pair.first == nvinfer1::DataType::kHALF && (isLINEAR || isCHW4 || isCHW16); - return supported; - }); - }; - if (!checkSafeDLAFormats(build.inputFormats) || !checkSafeDLAFormats(build.outputFormats)) - { - throw std::invalid_argument( - "I/O formats for safe DLA capability are restricted to fp16/int8:linear, fp16:chw16 or int8:chw32"); - } - if (system.fallback) - { - throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for safe DLA capability"); - } - } - } -} - -void SafeBuilderOptions::parse(Arguments& arguments) -{ - auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { - std::string list; - getAndDelOption(arguments, argument, list); - std::vector formats{splitToStringVec(list, ',')}; - for (const auto& f : formats) - { - formatsVector.push_back(stringToValue(f)); - } - }; - - getAndDelOption(arguments, "--serialized", serialized); - getAndDelOption(arguments, "--onnx", onnxModelFile); - getAndDelOption(arguments, "--help", help); - getAndDelOption(arguments, "-h", help); - getAndDelOption(arguments, "--verbose", verbose); - getAndDelOption(arguments, "-v", verbose); - getFormats(inputFormats, "--inputIOFormats"); - getFormats(outputFormats, "--outputIOFormats"); - getAndDelOption(arguments, "--int8", int8); - getAndDelOption(arguments, "--calib", calibFile); - getAndDelOption(arguments, "--consistency", consistency); - getAndDelOption(arguments, "--std", standard); - std::string pluginName; - while (getAndDelOption(arguments, "--plugins", pluginName)) - { - plugins.emplace_back(pluginName); - } -} - -std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) -{ - os << "=== Model Options ===" << std::endl; - - os << "Format: "; - switch (options.format) - { - case ModelFormat::kCAFFE: - { - os << "Caffe"; - break; - } - case ModelFormat::kONNX: - { - os << "ONNX"; - break; - } - case ModelFormat::kUFF: - { - os << "UFF"; - break; - } - case ModelFormat::kANY: - os << "*"; - break; - } - os << std::endl << "Model: " << options.model << std::endl; - - return os; -} - -std::ostream& operator<<(std::ostream& os, const UffInput& input) -{ - os << "Uff Inputs Layout: " << (input.NHWC ? "NHWC" : "NCHW") << std::endl; - for (const auto& i : input.inputs) - { - os << "Input: " << i.first << "," << i.second.d[0] << "," << i.second.d[1] << "," << i.second.d[2] << std::endl; - } - - return os; -} - -std::ostream& operator<<(std::ostream& os, const ModelOptions& options) -{ - os << options.baseModel; - switch (options.baseModel.format) - { - case ModelFormat::kCAFFE: - { - os << "Prototxt: " << options.prototxt << std::endl; - break; - } - case ModelFormat::kUFF: - { - os << options.uffInputs; - break; - } - case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or the generic case - case ModelFormat::kANY: - break; - } - - os << "Output:"; - for (const auto& o : options.outputs) - { - os << " " << o; - } - os << std::endl; - - return os; -} - -std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) -{ - switch (dtype) - { - case nvinfer1::DataType::kFLOAT: - { - os << "fp32"; - break; - } - case nvinfer1::DataType::kHALF: - { - os << "fp16"; - break; - } - case nvinfer1::DataType::kINT8: - { - os << "int8"; - break; - } - case nvinfer1::DataType::kINT32: - { - os << "int32"; - break; - } - case nvinfer1::DataType::kBOOL: - { - os << "bool"; - break; - } - } - return os; -} - -std::ostream& operator<<(std::ostream& os, IOFormat const& format) -{ - os << format.first << ":"; - - for (int32_t f = 0; f < nvinfer1::EnumMax(); ++f) - { - if ((1U << f) & format.second) - { - if (f) - { - os << "+"; - } - switch (nvinfer1::TensorFormat(f)) - { - case nvinfer1::TensorFormat::kLINEAR: - { - os << "chw"; - break; - } - case nvinfer1::TensorFormat::kCHW2: - { - os << "chw2"; - break; - } - case nvinfer1::TensorFormat::kHWC8: - { - os << "hwc8"; - break; - } -#if (NV_TENSORRT_MAJOR > 7) - case nvinfer1::TensorFormat::kHWC16: - { - os << "hwc16"; - break; - } -#endif - case nvinfer1::TensorFormat::kCHW4: - { - os << "chw4"; - break; - } - case nvinfer1::TensorFormat::kCHW16: - { - os << "chw16"; - break; - } - case nvinfer1::TensorFormat::kCHW32: - { - os << "chw32"; - break; - } - case nvinfer1::TensorFormat::kDHWC8: - { - os << "dhwc8"; - break; - } - case nvinfer1::TensorFormat::kCDHW32: - { - os << "cdhw32"; - break; - } - case nvinfer1::TensorFormat::kHWC: - { - os << "hwc"; - break; - } - case nvinfer1::TensorFormat::kDLA_LINEAR: - { - os << "dla_linear"; - break; - } - case nvinfer1::TensorFormat::kDLA_HWC4: - { - os << "dla_hwc4"; - break; - } - } - } - } - return os; -} - -std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) -{ - int32_t i = 0; - for (const auto& d : dims) - { - if (!d.size()) - { - break; - } - os << (i ? "+" : "") << d; - ++i; - } - return os; -} - -std::ostream& operator<<(std::ostream& os, LayerPrecisions const& layerPrecisions) -{ - int32_t i = 0; - for (auto const& layerPrecision : layerPrecisions) - { - os << (i ? "," : "") << layerPrecision.first << ":" << layerPrecision.second; - ++i; - } - return os; -} - -std::ostream& operator<<(std::ostream& os, const BuildOptions& options) -{ - // clang-format off - os << "=== Build Options ===" << std::endl << - - "Max batch: "; printBatch(os, options.maxBatch) << std::endl << - "Memory Pools: "; printMemoryPools(os, options) << std::endl << - "minTiming: " << options.minTiming << std::endl << - "avgTiming: " << options.avgTiming << std::endl << - "Precision: "; printPrecision(os, options) << std::endl << - "LayerPrecisions: " << options.layerPrecisions << std::endl << - "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl << - "Refit: " << boolToEnabled(options.refittable) << std::endl << - "Sparsity: "; printSparsity(os, options) << std::endl << - "Safe mode: " << boolToEnabled(options.safe) << std::endl << - "DirectIO mode: " << boolToEnabled(options.directIO) << std::endl << - "Restricted mode: " << boolToEnabled(options.restricted) << std::endl << - "Save engine: " << (options.save ? options.engine : "") << std::endl << - "Load engine: " << (options.load ? options.engine : "") << std::endl << - "Profiling verbosity: " << static_cast(options.profilingVerbosity) << std::endl << - "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl << - "timingCacheMode: "; printTimingCache(os, options) << std::endl << - "timingCacheFile: " << options.timingCacheFile << std::endl; - // clang-format on - - auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { - if (formats.empty()) - { - os << direction << "s format: fp32:CHW" << std::endl; - } - else - { - for(const auto& f : formats) - { - os << direction << ": " << f << std::endl; - } - } - }; - - printIOFormats(os, "Input(s)", options.inputFormats); - printIOFormats(os, "Output(s)", options.outputFormats); - printShapes(os, "build", options.shapes); - printShapes(os, "calibration", options.shapesCalib); - - return os; -} - -std::ostream& operator<<(std::ostream& os, const SystemOptions& options) -{ - // clang-format off - os << "=== System Options ===" << std::endl << - - "Device: " << options.device << std::endl << - "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << - (options.DLACore != -1 && options.fallback ? "(With GPU fallback)" : "") << std::endl; - os << "Plugins:"; - - for (const auto& p : options.plugins) - { - os << " " << p; - } - os << std::endl; - - return os; - // clang-format on -} - -std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) -{ -// clang-format off - os << "=== Inference Options ===" << std::endl << - - "Batch: "; - if (options.batch && options.shapes.empty()) - { - os << options.batch << std::endl; - } - else - { - os << "Explicit" << std::endl; - } - printShapes(os, "inference", options.shapes); - os << "Iterations: " << options.iterations << std::endl << - "Duration: " << options.duration << "s (+ " - << options.warmup << "ms warm up)" << std::endl << - "Sleep time: " << options.sleep << "ms" << std::endl << - "Idle time: " << options.idle << "ms" << std::endl << - "Streams: " << options.streams << std::endl << - "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << - "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << - "Spin-wait: " << boolToEnabled(options.spin) << std::endl << - "Multithreading: " << boolToEnabled(options.threads) << std::endl << - "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << - "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << - "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << - "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << - "Skip inference: " << boolToEnabled(options.skip) << std::endl; - -// clang-format on - os << "Inputs:" << std::endl; - for (const auto& input : options.inputs) - { - os << input.first << "<-" << input.second << std::endl; - } - - return os; -} - -std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) -{ -// clang-format off - os << "=== Reporting Options ===" << std::endl << - - "Verbose: " << boolToEnabled(options.verbose) << std::endl << - "Averages: " << options.avgs << " inferences" << std::endl << - "Percentile: " << options.percentile << std::endl << - "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << - "Dump output: " << boolToEnabled(options.output) << std::endl << - "Profile: " << boolToEnabled(options.profile) << std::endl << - "Export timing to JSON file: " << options.exportTimes << std::endl << - "Export output to JSON file: " << options.exportOutput << std::endl << - "Export profile to JSON file: " << options.exportProfile << std::endl; -// clang-format on - - return os; -} - -std::ostream& operator<<(std::ostream& os, const AllOptions& options) -{ - os << options.model << options.build << options.system << options.inference << options.reporting << std::endl; - return os; -} - -std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) -{ - auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { - if (formats.empty()) - { - os << direction << "s format: fp32:CHW" << std::endl; - } - else - { - for(const auto& f : formats) - { - os << direction << ": " << f << std::endl; - } - } - }; - - os << "=== Build Options ===" << std::endl; - os << "Model ONNX: " << options.onnxModelFile << std::endl; - - os << "Precision: FP16"; - if (options.int8) - { - os << " + INT8"; - } - os << std::endl; - os << "Calibration file: " << options.calibFile << std::endl; - os << "Serialized Network: " << options.serialized << std::endl; - - printIOFormats(os, "Input(s)", options.inputFormats); - printIOFormats(os, "Output(s)", options.outputFormats); - - os << "Plugins:"; - for (const auto& p : options.plugins) - { - os << " " << p; - } - os << std::endl; - return os; -} - -void BaseModelOptions::help(std::ostream& os) -{ -// clang-format off - os << " --uff= UFF model" << std::endl << - " --onnx= ONNX model" << std::endl << - " --model= Caffe model (default = no model, random weights used)" << std::endl; -// clang-format on -} - -void UffInput::help(std::ostream& os) -{ -// clang-format off - os << " --uffInput=,X,Y,Z Input blob name and its dimensions (X,Y,Z=C,H,W), it can be specified " - "multiple times; at least one is required for UFF models" << std::endl << - " --uffNHWC Set if inputs are in the NHWC layout instead of NCHW (use " << - "X,Y,Z=H,W,C order in --uffInput)" << std::endl; -// clang-format on -} - -void ModelOptions::help(std::ostream& os) -{ -// clang-format off - os << "=== Model Options ===" << std::endl; - BaseModelOptions::help(os); - os << " --deploy= Caffe prototxt file" << std::endl << - " --output=[,]* Output names (it can be specified multiple times); at least one output " - "is required for UFF and Caffe" << std::endl; - UffInput::help(os); -// clang-format on -} - -void BuildOptions::help(std::ostream& os) -{ -// clang-format off - os << "=== Build Options ===" "\n" - " --maxBatch Set max batch size and build an implicit batch engine (default = same size as --batch)" "\n" - " This option should not be used when the input model is ONNX or when dynamic shapes are provided." "\n" - " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" - " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" - " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" - " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" - " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" - " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" - " Note: All three of min, opt and max shapes must be supplied." "\n" - " However, if only opt shapes is supplied then it will be expanded so" "\n" - " that min shapes and max shapes are set to the same values as opt shapes." "\n" - " Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." "\n" - " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" - " Each input shape is supplied as a key-value pair where key is the input name and" "\n" - " value is the dimensions (including the batch dimension) to be used for that input." "\n" - " Each key-value pair has the key and value separated using a colon (:)." "\n" - " Multiple input shapes can be provided via comma-separated key-value pairs." "\n" - " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" - " See --outputIOFormats help for the grammar of type and format list." "\n" - " Note: If this option is specified, please set comma-separated types and formats for all" "\n" - " inputs following the same order as network inputs ID (even if only one input" "\n" - " needs specifying IO format) or set the type and format once for broadcasting." "\n" - " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" - " Note: If this option is specified, please set comma-separated types and formats for all" "\n" - " outputs following the same order as network outputs ID (even if only one output" "\n" - " needs specifying IO format) or set the type and format once for broadcasting." "\n" - " IO Formats: spec ::= IOfmt[\",\"spec]" "\n" - " IOfmt ::= type:fmt" "\n" - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" - " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" "\n" - " --workspace=N Set workspace size in MiB." "\n" - " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s) in MiB." "\n" - " Note: Also accepts decimal sizes, e.g. 0.25MiB. Will be rounded down to the nearest integer bytes." "\n" - " Pool constraint: poolspec ::= poolfmt[\",\"poolspec]" "\n" - " poolfmt ::= pool:sizeInMiB" "\n" - " pool ::= \"workspace\"|\"dlaSRAM\"|\"dlaLocalDRAM\"|\"dlaGlobalDRAM\"" "\n" - " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)" "\n" - " --minTiming=M Set the minimum number of iterations used in kernel selection (default = " - << defaultMinTiming << ")" "\n" - " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " - << defaultAvgTiming << ")" "\n" - " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" - " and weights within the engine." "\n" - " --sparsity=spec Control sparsity (default = disabled). " "\n" - " Sparsity: spec ::= \"disable\", \"enable\", \"force\"" "\n" - " Note: Description about each of these options is as below" "\n" - " disable = do not enable sparse tactics in the builder (this is the default)" "\n" - " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" - " considered if the weights have the right sparsity pattern)" "\n" - " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" - " a sparsity pattern (even if you loaded a model yourself)" "\n" - " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" - " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" - " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" - " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" - " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" - " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" - " Precision Constaints: spec ::= \"none\" | \"obey\" | \"prefer\"" "\n" - " none = no constraints" "\n" - " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" - " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" - " otherwise" "\n" - " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" - " \"obey\" or \"prefer\". (default = none)" "\n" - " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" - " layerName to specify the default precision for all the unspecified layers." "\n" - " Per-layer precision spec ::= layerPrecision[\",\"spec]" "\n" - " layerPrecision ::= layerName\":\"precision" "\n" - " precision ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" - " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" - " \"obey\" or \"prefer\". (default = none)" "\n" - " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" - " layerName to specify the default precision for all the unspecified layers. If a layer has more than""\n" - " one output, then multiple types separated by \"+\" can be provided for this layer." "\n" - " Per-layer output type spec ::= layerOutputTypes[\",\"spec]" "\n" - " layerOutputTypes ::= layerName\":\"type" "\n" - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"[\"+\"type]" "\n" - " --calib= Read INT8 calibration cache file" "\n" - " --safe Enable build safety certified engine" "\n" - " --consistency Perform consistency checking on safety certified engine" "\n" - " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" - " --saveEngine= Save the serialized engine" "\n" - " --loadEngine= Load a serialized engine" "\n" - " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" - " tactic sources (default = all available tactics)." "\n" - " Note: Currently only cuDNN, cuBLAS and cuBLAS-LT are listed as optional tactics." "\n" - " Tactic Sources: tactics ::= [\",\"tactic]" "\n" - " tactic ::= (+|-)lib" "\n" - " lib ::= \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"" "\n" - " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" - " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" - " --timingCacheFile= Save/load the serialized global timing cache" "\n" - ; -// clang-format on - os << std::flush; -} - -void SystemOptions::help(std::ostream& os) -{ -// clang-format off - os << "=== System Options ===" << std::endl << - " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << - " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << - " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers " - "(default = disabled)" << std::endl; - os << " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl; -// clang-format on -} - -void InferenceOptions::help(std::ostream& os) -{ - // clang-format off - os << "=== Inference Options ===" << std::endl << - " --batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << - " This option should not be used when the engine is built from an ONNX model or when dynamic" << std::endl << - " shapes are provided when the engine is built." << std::endl << - " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl << - " Note: Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." << std::endl << - " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl << - " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl << - " value is the dimensions (including the batch dimension) to be used for that input." << std::endl << - " Each key-value pair has the key and value separated using a colon (:)." << std::endl << - " Multiple input shapes can be provided via comma-separated key-value pairs." << std::endl << - " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be " - "wrapped with single quotes (ex: 'Input:0')" << std::endl << - " Input values spec ::= Ival[\",\"spec]" << std::endl << - " Ival ::= name\":\"file" << std::endl << - " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl << - " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = " - << defaultWarmUp << ")" << std::endl << - " --duration=N Run performance measurements for at least N seconds wallclock time (default = " - << defaultDuration << ")" << std::endl << - " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute " - "(default = " << defaultSleep << ")" << std::endl << - " --idleTime=N Sleep N milliseconds between two continuous iterations" - "(default = " << defaultIdle << ")" << std::endl << - " --streams=N Instantiate N engines to use concurrently (default = " << defaultStreams << ")" << std::endl << - " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl << - " --noDataTransfers Disable DMA transfers to and from device (default = enabled)." << std::endl << - " --useManagedMemory Use managed memory instead of seperate host and device allocations (default = disabled)." << std::endl << - " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but " - "increase CPU usage and power (default = disabled)" << std::endl << - " --threads Enable multithreading to drive engines with independent threads" - " or speed up refitting (default = disabled) " << std::endl << - " --useCudaGraph Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl << - " This flag may be ignored if the graph capture fails." << std::endl << - " --timeDeserialize Time the amount of time it takes to deserialize the network and exit." << std::endl << - " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl << - " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second " - "profile run will be executed (default = disabled)" << std::endl << - " --buildOnly Skip inference perf measurement (default = disabled)" << std::endl; - // clang-format on -} - -void ReportingOptions::help(std::ostream& os) -{ -// clang-format off - os << "=== Reporting Options ===" << std::endl << - " --verbose Use verbose logging (default = false)" << std::endl << - " --avgRuns=N Report performance measurements averaged over N consecutive " - "iterations (default = " << defaultAvgRuns << ")" << std::endl << - " --percentile=P Report performance for the P percentage (0<=P<=100, 0 " - "representing max perf, and 100 representing min perf; (default" - " = " << defaultPercentile << "%)" << std::endl << - " --dumpRefit Print the refittable layers and weights from a refittable " - "engine" << std::endl << - " --dumpOutput Print the output tensor(s) of the last inference iteration " - "(default = disabled)" << std::endl << - " --dumpProfile Print profile information per layer (default = disabled)" << std::endl << - " --dumpLayerInfo Print layer information of the engine to console " - "(default = disabled)" << std::endl << - " --exportTimes= Write the timing results in a json file (default = disabled)" << std::endl << - " --exportOutput= Write the output tensors to a json file (default = disabled)" << std::endl << - " --exportProfile= Write the profile information per layer in a json file " - "(default = disabled)" << std::endl << - " --exportLayerInfo= Write the layer information of the engine in a json file " - "(default = disabled)" << std::endl; -// clang-format on -} - -void helpHelp(std::ostream& os) -{ -// clang-format off - os << "=== Help ===" << std::endl << - " --help, -h Print this message" << std::endl; -// clang-format on -} - -void AllOptions::help(std::ostream& os) -{ - ModelOptions::help(os); - os << std::endl; - BuildOptions::help(os); - os << std::endl; - InferenceOptions::help(os); - os << std::endl; -// clang-format off - os << "=== Build and Inference Batch Options ===" << std::endl << - " When using implicit batch, the max batch size of the engine, if not given, " << std::endl << - " is set to the inference batch size;" << std::endl << - " when using explicit batch, if shapes are specified only for inference, they " << std::endl << - " will be used also as min/opt/max in the build profile; if shapes are " << std::endl << - " specified only for the build, the opt shapes will be used also for inference;" << std::endl << - " if both are specified, they must be compatible; and if explicit batch is " << std::endl << - " enabled but neither is specified, the model must provide complete static" << std::endl << - " dimensions, including batch size, for all inputs" << std::endl << - " Using ONNX models automatically forces explicit batch." << std::endl << - std::endl; - // clang-format on - ReportingOptions::help(os); - os << std::endl; - SystemOptions::help(os); - os << std::endl; - helpHelp(os); -} - -void SafeBuilderOptions::printHelp(std::ostream& os) -{ -// clang-format off - os << "=== Mandatory ===" << std::endl << - " --onnx= ONNX model" << std::endl << - " " << std::endl << - "=== Optional ===" << std::endl << - " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl << - " See --outputIOFormats help for the grammar of type and format list." << std::endl << - " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << - " inputs following the same order as network inputs ID (even if only one input" << std::endl << - " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << - " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl << - " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << - " outputs following the same order as network outputs ID (even if only one output" << std::endl << - " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << - " IO Formats: spec ::= IOfmt[\",\"spec]" << std::endl << - " IOfmt ::= type:fmt" << std::endl << - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" << std::endl << - " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl << - " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl << - " --consistency Enable consistency check for serialized engine, (default = disabled)" << std::endl << - " --std Build standard serialized engine, (default = disabled)" << std::endl << - " --calib= Read INT8 calibration cache file" << std::endl << - " --serialized= Save the serialized network" << std::endl << - " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl << - " --verbose or -v Use verbose logging (default = false)" << std::endl << - " --help or -h Print this message" << std::endl << - " " << std::endl; -// clang-format on -} - -} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.h deleted file mode 100644 index 8975e1ea6..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleOptions.h +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_OPTIONS_H -#define TRT_SAMPLE_OPTIONS_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NvInfer.h" - -namespace sample -{ - -// Build default params -constexpr int32_t maxBatchNotProvided{0}; -constexpr int32_t defaultMinTiming{1}; -constexpr int32_t defaultAvgTiming{8}; - -// System default params -constexpr int32_t defaultDevice{0}; - -// Inference default params -constexpr int32_t defaultBatch{1}; -constexpr int32_t batchNotProvided{0}; -constexpr int32_t defaultStreams{1}; -constexpr int32_t defaultIterations{10}; -constexpr float defaultWarmUp{200.F}; -constexpr float defaultDuration{3.F}; -constexpr float defaultSleep{}; -constexpr float defaultIdle{}; - -// Reporting default params -constexpr int32_t defaultAvgRuns{10}; -constexpr float defaultPercentile{99}; - -enum class PrecisionConstraints -{ - kNONE, - kOBEY, - kPREFER -}; - -enum class ModelFormat -{ - kANY, - kCAFFE, - kONNX, - kUFF -}; - -enum class SparsityFlag -{ - kDISABLE, - kENABLE, - kFORCE -}; - -enum class TimingCacheMode -{ - kDISABLE, - kLOCAL, - kGLOBAL -}; - -using Arguments = std::unordered_multimap; - -using IOFormat = std::pair; - -using ShapeRange = std::array, nvinfer1::EnumMax()>; - -using LayerPrecisions = std::unordered_map; -using LayerOutputTypes = std::unordered_map>; - -struct Options -{ - virtual void parse(Arguments& arguments) = 0; -}; - -struct BaseModelOptions : public Options -{ - ModelFormat format{ModelFormat::kANY}; - std::string model; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct UffInput : public Options -{ - std::vector> inputs; - bool NHWC{false}; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct ModelOptions : public Options -{ - BaseModelOptions baseModel; - std::string prototxt; - std::vector outputs; - UffInput uffInputs; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct BuildOptions : public Options -{ - int32_t maxBatch{maxBatchNotProvided}; - double workspace{-1.0}; - double dlaSRAM{-1.0}; - double dlaLocalDRAM{-1.0}; - double dlaGlobalDRAM{-1.0}; - int32_t minTiming{defaultMinTiming}; - int32_t avgTiming{defaultAvgTiming}; - bool tf32{true}; - bool fp16{false}; - bool int8{false}; - bool directIO{false}; - PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE}; - LayerPrecisions layerPrecisions; - LayerOutputTypes layerOutputTypes; - bool safe{false}; - bool consistency{false}; - bool restricted{false}; - bool save{false}; - bool load{false}; - bool refittable{false}; - SparsityFlag sparsity{SparsityFlag::kDISABLE}; -#if (NV_TENSORRT_MAJOR > 7) - nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; -#else - nvinfer1::ProfilingVerbosity profilingVerbosity{ nvinfer1::ProfilingVerbosity::kDEFAULT }; -#endif - std::string engine; - std::string calibration; - std::unordered_map shapes; - std::unordered_map shapesCalib; - std::vector inputFormats; - std::vector outputFormats; - nvinfer1::TacticSources enabledTactics{0}; - nvinfer1::TacticSources disabledTactics{0}; - TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; - std::string timingCacheFile{}; - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct SystemOptions : public Options -{ - int32_t device{defaultDevice}; - int32_t DLACore{-1}; - bool fallback{false}; - std::vector plugins; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct InferenceOptions : public Options -{ - int32_t batch{batchNotProvided}; - int32_t iterations{defaultIterations}; - int32_t streams{defaultStreams}; - float warmup{defaultWarmUp}; - float duration{defaultDuration}; - float sleep{defaultSleep}; - float idle{defaultIdle}; - bool overlap{true}; - bool skipTransfers{false}; - bool useManaged{false}; - bool spin{false}; - bool threads{false}; - bool graph{false}; - bool skip{false}; - bool rerun{false}; - bool timeDeserialize{false}; - bool timeRefit{false}; - std::unordered_map inputs; - std::unordered_map> shapes; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct ReportingOptions : public Options -{ - bool verbose{false}; - int32_t avgs{defaultAvgRuns}; - float percentile{defaultPercentile}; - bool refit{false}; - bool output{false}; - bool profile{false}; - bool layerInfo{false}; - std::string exportTimes; - std::string exportOutput; - std::string exportProfile; - std::string exportLayerInfo; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct SafeBuilderOptions : public Options -{ - std::string serialized{}; - std::string onnxModelFile{}; - bool help{false}; - bool verbose{false}; - std::vector inputFormats; - std::vector outputFormats; - bool int8{false}; - std::string calibFile{}; - std::vector plugins; - bool consistency{false}; - bool standard{false}; - - void parse(Arguments& arguments) override; - - static void printHelp(std::ostream& out); -}; - -struct AllOptions : public Options -{ - ModelOptions model; - BuildOptions build; - SystemOptions system; - InferenceOptions inference; - ReportingOptions reporting; - bool helps{false}; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -Arguments argsToArgumentsMap(int32_t argc, char* argv[]); - -bool parseHelp(Arguments& arguments); - -void helpHelp(std::ostream& out); - -// Functions to print options - -std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options); - -std::ostream& operator<<(std::ostream& os, const UffInput& input); - -std::ostream& operator<<(std::ostream& os, const IOFormat& format); - -std::ostream& operator<<(std::ostream& os, const ShapeRange& dims); - -std::ostream& operator<<(std::ostream& os, const ModelOptions& options); - -std::ostream& operator<<(std::ostream& os, const BuildOptions& options); - -std::ostream& operator<<(std::ostream& os, const SystemOptions& options); - -std::ostream& operator<<(std::ostream& os, const InferenceOptions& options); - -std::ostream& operator<<(std::ostream& os, const ReportingOptions& options); - -std::ostream& operator<<(std::ostream& os, const AllOptions& options); - -std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options); - -inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) -{ - for (int32_t i = 0; i < dims.nbDims; ++i) - { - os << (i ? "x" : "") << dims.d[i]; - } - return os; -} -inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole role) -{ - switch (role) - { - case nvinfer1::WeightsRole::kKERNEL: - { - os << "Kernel"; - break; - } - case nvinfer1::WeightsRole::kBIAS: - { - os << "Bias"; - break; - } - case nvinfer1::WeightsRole::kSHIFT: - { - os << "Shift"; - break; - } - case nvinfer1::WeightsRole::kSCALE: - { - os << "Scale"; - break; - } - case nvinfer1::WeightsRole::kCONSTANT: - { - os << "Constant"; - break; - } -#if (NV_TENSORRT_MAJOR > 7) - case nvinfer1::WeightsRole::kANY: - { - os << "Any"; - break; - } -#endif - } - - return os; -} - -inline std::ostream& operator<<(std::ostream& os, const std::vector& vec) -{ - for (int32_t i = 0, e = static_cast(vec.size()); i < e; ++i) - { - os << (i ? "x" : "") << vec[i]; - } - return os; -} - -} // namespace sample - -#endif // TRT_SAMPLES_OPTIONS_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.cpp b/src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.cpp deleted file mode 100644 index a92938c5b..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.cpp +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "sampleInference.h" -#include "sampleOptions.h" -#include "sampleReporting.h" - -namespace sample -{ - -namespace -{ - -//! -//! \brief Find percentile in an ascending sequence of timings -//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown. -//! -template -float findPercentile(float percentile, std::vector const& timings, T const& toFloat) -{ - int32_t const all = static_cast(timings.size()); - int32_t const exclude = static_cast((1 - percentile / 100) * all); - if (timings.empty()) - { - return std::numeric_limits::infinity(); - } - if (percentile < 0.0f || percentile > 100.0f) - { - throw std::runtime_error("percentile is not in [0, 100]!"); - } - return toFloat(timings[std::max(all - 1 - exclude, 0)]); -} - -//! -//! \brief Find median in a sorted sequence of timings -//! -template -float findMedian(std::vector const& timings, T const& toFloat) -{ - if (timings.empty()) - { - return std::numeric_limits::infinity(); - } - - int32_t const m = timings.size() / 2; - if (timings.size() % 2) - { - return toFloat(timings[m]); - } - - return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2; -} - -//! -//! \brief Find coefficient of variance (which is std / mean) in a sorted sequence of timings given the mean -//! -template -float findCoeffOfVariance(std::vector const& timings, T const& toFloat, float mean) -{ - if (timings.empty()) - { - return 0; - } - - if (mean == 0.F) - { - return std::numeric_limits::infinity(); - } - - auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) { - float const diff = toFloat(a) - mean; - return acc + diff * diff; - }; - float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size(); - - return std::sqrt(variance) / mean * 100.F; -} - -inline InferenceTime traceToTiming(const InferenceTrace& a) -{ - return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart), - (a.d2hEnd - a.d2hStart), (a.d2hEnd - a.h2dStart)); -} - -} // namespace - -void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os) -{ - os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl; - os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl; -} - -void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os) -{ - int32_t count = 0; - InferenceTime sum; - - os << std::endl; - os << "=== Trace details ===" << std::endl; - os << "Trace averages of " << runsPerAvg << " runs:" << std::endl; - for (auto const& t : timings) - { - sum += t; - - if (++count == runsPerAvg) - { - // clang-format off - os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg - << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg - << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl; - // clang-format on - count = 0; - sum.enq = 0; - sum.h2d = 0; - sum.compute = 0; - sum.d2h = 0; - sum.e2e = 0; - } - } -} - -void printMetricExplanations(std::ostream& os) -{ - os << std::endl; - os << "=== Explanations of the performance metrics ===" << std::endl; - os << "Total Host Walltime: the host walltime from when the first query (after warmups) is enqueued to when the " - "last query is completed." - << std::endl; - os << "GPU Compute Time: the GPU latency to execute the kernels for a query." << std::endl; - os << "Total GPU Compute Time: the summation of the GPU Compute Time of all the queries. If this is significantly " - "shorter than Total Host Walltime, the GPU may be under-utilized because of host-side overheads or data " - "transfers." - << std::endl; - os << "Throughput: the observed throughput computed by dividing the number of queries by the Total Host Walltime. " - "If this is significantly lower than the reciprocal of GPU Compute Time, the GPU may be under-utilized " - "because of host-side overheads or data transfers." - << std::endl; - os << "Enqueue Time: the host latency to enqueue a query. If this is longer than GPU Compute Time, the GPU may be " - "under-utilized." - << std::endl; - os << "H2D Latency: the latency for host-to-device data transfers for input tensors of a single query." - << std::endl; - os << "D2H Latency: the latency for device-to-host data transfers for output tensors of a single query." - << std::endl; - os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H Latency. This is the latency to infer a " - "single query." - << std::endl; - os << "End-to-End Host Latency: the duration from when the H2D of a query is called to when the D2H of the same " - "query is completed, which includes the latency to wait for the completion of the previous query. This is " - "the latency of a query if multiple queries are enqueued consecutively." - << std::endl; -} - -PerformanceResult getPerformanceResult(std::vector const& timings, - std::function metricGetter, float percentile) -{ - auto const metricComparator - = [metricGetter](InferenceTime const& a, InferenceTime const& b) { return metricGetter(a) < metricGetter(b); }; - auto const metricAccumulator = [metricGetter](float acc, InferenceTime const& a) { return acc + metricGetter(a); }; - std::vector newTimings = timings; - std::sort(newTimings.begin(), newTimings.end(), metricComparator); - PerformanceResult result; - result.min = metricGetter(newTimings.front()); - result.max = metricGetter(newTimings.back()); - result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f, metricAccumulator) / newTimings.size(); - result.median = findMedian(newTimings, metricGetter); - result.percentile = findPercentile(percentile, newTimings, metricGetter); - result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean); - return result; -} - -void printEpilog(std::vector const& timings, float walltimeMs, float percentile, int32_t batchSize, - std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) -{ - float const throughput = batchSize * timings.size() / walltimeMs * 1000; - - auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; - auto const latencyResult = getPerformanceResult(timings, getLatency, percentile); - - auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; }; - auto const e2eLatencyResult = getPerformanceResult(timings, getEndToEnd, percentile); - - auto const getEnqueue = [](InferenceTime const& t) { return t.enq; }; - auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentile); - - auto const getH2d = [](InferenceTime const& t) { return t.h2d; }; - auto const h2dResult = getPerformanceResult(timings, getH2d, percentile); - - auto const getCompute = [](InferenceTime const& t) { return t.compute; }; - auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentile); - - auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; - auto const d2hResult = getPerformanceResult(timings, getD2h, percentile); - - auto const toPerfString = [percentile](const PerformanceResult& r) { - std::stringstream s; - s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, " - << "median = " << r.median << " ms, percentile(" << percentile << "%) = " << r.percentile << " ms"; - return s.str(); - }; - - osInfo << std::endl; - osInfo << "=== Performance summary ===" << std::endl; - osInfo << "Throughput: " << throughput << " qps" << std::endl; - osInfo << "Latency: " << toPerfString(latencyResult) << std::endl; - osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult) << std::endl; - osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl; - osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl; - osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; - osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl; - osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl; - osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl; - - // Report warnings if the throughput is bound by other factors than GPU Compute Time. - constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F}; - if (enqueueResult.median > kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) - { - osWarning - << "* Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized." - << std::endl; - osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the " - "throughput." - << std::endl; - } - if (h2dResult.median >= gpuComputeResult.median) - { - osWarning << "* Throughput may be bound by host-to-device transfers for the inputs rather than GPU Compute and " - "the GPU may be under-utilized." - << std::endl; - osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl; - } - if (d2hResult.median >= gpuComputeResult.median) - { - osWarning << "* Throughput may be bound by device-to-host transfers for the outputs rather than GPU Compute " - "and the GPU may be under-utilized." - << std::endl; - osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl; - } - - // Report warnings if the GPU Compute Time is unstable. - constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F}; - if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) - { - osWarning << "* GPU compute time is unstable, with coefficient of variance = " << gpuComputeResult.coeffVar - << "%." << std::endl; - osWarning << " If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the " - << "stability." << std::endl; - } - - // Explain what the metrics mean. - osInfo << "Explanations of the performance metrics are printed in the verbose logs." << std::endl; - printMetricExplanations(osVerbose); - - osInfo << std::endl; -} - -void printPerformanceReport(std::vector const& trace, const ReportingOptions& reporting, float warmupMs, - int32_t batchSize, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) -{ - auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; }; - auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); - int32_t const warmups = noWarmup - trace.begin(); - float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; - // when implicit batch used, batchSize = options.inference.batch, which is parsed through --batch - // when explicit batch used, batchSize = options.inference.batch = 0 - // treat inference with explicit batch as a single query and report the throughput - batchSize = batchSize ? batchSize : 1; - printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); - - std::vector timings(trace.size() - warmups); - std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); - printTiming(timings, reporting.avgs, osInfo); - printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo, osWarning, osVerbose); - - if (!reporting.exportTimes.empty()) - { - exportJSONTrace(trace, reporting.exportTimes); - } -} - -//! Printed format: -//! [ value, ...] -//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end h2d" : time, "start compute" : time, -//! "end compute" : time, "start d2h" : time, "end d2h" : time, "h2d" : time, "compute" : time, -//! "d2h" : time, "latency" : time, "end to end" : time } -//! -void exportJSONTrace(std::vector const& trace, std::string const& fileName) -{ - std::ofstream os(fileName, std::ofstream::trunc); - os << "[" << std::endl; - char const* sep = " "; - for (auto const& t : trace) - { - InferenceTime const it(traceToTiming(t)); - os << sep << "{ "; - sep = ", "; - // clang-format off - os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep - << "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep - << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep - << "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep - << "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep - << "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep - << "\"endToEndMs\" : " << it.e2e << " }" << std::endl; - // clang-format on - } - os << "]" << std::endl; -} - -void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept -{ - if (mIterator == mLayers.end()) - { - bool const first = !mLayers.empty() && mLayers.begin()->name == layerName; - mUpdatesCount += mLayers.empty() || first; - if (first) - { - mIterator = mLayers.begin(); - } - else - { - mLayers.emplace_back(); - mLayers.back().name = layerName; - mIterator = mLayers.end() - 1; - } - } - - mIterator->timeMs += timeMs; - ++mIterator; -} - -void Profiler::print(std::ostream& os) const noexcept -{ - std::string const nameHdr("Layer"); - std::string const timeHdr(" Time (ms)"); - std::string const avgHdr(" Avg. Time (ms)"); - std::string const percentageHdr(" Time %"); - - float const totalTimeMs = getTotalTime(); - - auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) { return a.name.size() < b.name.size(); }; - auto const longestName = std::max_element(mLayers.begin(), mLayers.end(), cmpLayer); - auto const nameLength = std::max(longestName->name.size() + 1, nameHdr.size()); - auto const timeLength = timeHdr.size(); - auto const avgLength = avgHdr.size(); - auto const percentageLength = percentageHdr.size(); - - os << std::endl - << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl - << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr << std::endl; - - for (auto const& p : mLayers) - { - // clang-format off - os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs - << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100 - << std::endl; - } - { - os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2) - << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl; - // clang-format on - } - os << std::endl; -} - -void Profiler::exportJSONProfile(std::string const& fileName) const noexcept -{ - std::ofstream os(fileName, std::ofstream::trunc); - os << "[" << std::endl << " { \"count\" : " << mUpdatesCount << " }" << std::endl; - - auto const totalTimeMs = getTotalTime(); - - for (auto const& l : mLayers) - { - // clang-format off - os << ", {" << " \"name\" : \"" << l.name << "\"" - ", \"timeMs\" : " << l.timeMs - << ", \"averageMs\" : " << l.timeMs / mUpdatesCount - << ", \"percentage\" : " << l.timeMs / totalTimeMs * 100 - << " }" << std::endl; - // clang-format on - } - os << "]" << std::endl; -} - -void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) -{ - os << "Input Tensors:" << std::endl; - bindings.dumpInputs(context, os); -} - -void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) -{ - os << "Output Tensors:" << std::endl; - bindings.dumpOutputs(context, os); -} - -void exportJSONOutput( - nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch) -{ - std::ofstream os(fileName, std::ofstream::trunc); - std::string sep = " "; - auto const output = bindings.getOutputBindings(); - os << "[" << std::endl; - for (auto const& binding : output) - { - // clang-format off - os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl; - sep = ", "; - os << " " << sep << "\"dimensions\" : \""; - bindings.dumpBindingDimensions(binding.second, context, os); - os << "\"" << std::endl; - os << " " << sep << "\"values\" : [ "; - bindings.dumpBindingValues(context, binding.second, os, sep, batch); - os << " ]" << std::endl << " }" << std::endl; - // clang-format on - } - os << "]" << std::endl; -} - -} // namespace sample diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.h deleted file mode 100644 index 5f7309872..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleReporting.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_REPORTING_H -#define TRT_SAMPLE_REPORTING_H - -#include -#include - -#include "NvInfer.h" - -#include "sampleOptions.h" -#include "sampleUtils.h" - -namespace sample -{ - -//! -//! \struct InferenceTime -//! \brief Measurement times in milliseconds -//! -struct InferenceTime -{ - InferenceTime(float q, float i, float c, float o, float e) - : enq(q) - , h2d(i) - , compute(c) - , d2h(o) - , e2e(e) - { - } - - InferenceTime() = default; - InferenceTime(InferenceTime const&) = default; - InferenceTime(InferenceTime&&) = default; - InferenceTime& operator=(InferenceTime const&) = default; - InferenceTime& operator=(InferenceTime&&) = default; - ~InferenceTime() = default; - - float enq{0}; // Enqueue - float h2d{0}; // Host to Device - float compute{0}; // Compute - float d2h{0}; // Device to Host - float e2e{0}; // end to end - - // ideal latency - float latency() const - { - return h2d + compute + d2h; - } -}; - -//! -//! \struct InferenceTrace -//! \brief Measurement points in milliseconds -//! -struct InferenceTrace -{ - InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe) - : stream(s) - , enqStart(es) - , enqEnd(ee) - , h2dStart(is) - , h2dEnd(ie) - , computeStart(cs) - , computeEnd(ce) - , d2hStart(os) - , d2hEnd(oe) - { - } - - InferenceTrace() = default; - InferenceTrace(InferenceTrace const&) = default; - InferenceTrace(InferenceTrace&&) = default; - InferenceTrace& operator=(InferenceTrace const&) = default; - InferenceTrace& operator=(InferenceTrace&&) = default; - ~InferenceTrace() = default; - - int32_t stream{0}; - float enqStart{0}; - float enqEnd{0}; - float h2dStart{0}; - float h2dEnd{0}; - float computeStart{0}; - float computeEnd{0}; - float d2hStart{0}; - float d2hEnd{0}; -}; - -inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) -{ - return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h, a.e2e + b.e2e); -} - -inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) -{ - return a = a + b; -} - -//! -//! \struct PerformanceResult -//! \brief Performance result of a performance metric -//! -struct PerformanceResult -{ - float min{0}; - float max{0}; - float mean{0}; - float median{0}; - float percentile{0}; - float coeffVar{0}; // coefficient of variation -}; - -//! -//! \brief Print benchmarking time and number of traces collected -//! -void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os); - -//! -//! \brief Print a timing trace -//! -void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os); - -//! -//! \brief Print the performance summary of a trace -//! -void printEpilog(std::vector const& timings, float percentile, int32_t batchSize, std::ostream& osInfo, - std::ostream& osWarning, std::ostream& osVerbose); - -//! -//! \brief Get the result of a specific performance metric from a trace -//! -PerformanceResult getPerformanceResult(std::vector const& timings, - std::function metricGetter, float percentile); - -//! -//! \brief Print the explanations of the performance metrics printed in printEpilog() function. -//! -void printMetricExplanations(std::ostream& os); - -//! -//! \brief Print and summarize a timing trace -//! -void printPerformanceReport(std::vector const& trace, ReportingOptions const& reporting, float warmupMs, - int32_t batchSize, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose); - -//! -//! \brief Export a timing trace to JSON file -//! -void exportJSONTrace(std::vector const& trace, std::string const& fileName); - -//! -//! \brief Print input tensors to stream -//! -void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); - -//! -//! \brief Print output tensors to stream -//! -void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); - -//! -//! \brief Export output tensors to JSON file -//! -void exportJSONOutput( - nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch); - -//! -//! \struct LayerProfile -//! \brief Layer profile information -//! -struct LayerProfile -{ - std::string name; - float timeMs{0}; -}; - -//! -//! \class Profiler -//! \brief Collect per-layer profile information, assuming times are reported in the same order -//! -class Profiler : public nvinfer1::IProfiler -{ - -public: - void reportLayerTime(char const* layerName, float timeMs) noexcept override; - - void print(std::ostream& os) const noexcept; - - //! - //! \brief Export a profile to JSON file - //! - void exportJSONProfile(std::string const& fileName) const noexcept; - -private: - float getTotalTime() const noexcept - { - auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs; }; - return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime); - } - - std::vector mLayers; - std::vector::iterator mIterator{mLayers.begin()}; - int32_t mUpdatesCount{0}; -}; - -} // namespace sample - -#endif // TRT_SAMPLE_REPORTING_H diff --git a/src/Detector/tensorrt_yolo/common_deprecated/sampleUtils.h b/src/Detector/tensorrt_yolo/common_deprecated/sampleUtils.h deleted file mode 100644 index 1509a7fcd..000000000 --- a/src/Detector/tensorrt_yolo/common_deprecated/sampleUtils.h +++ /dev/null @@ -1,543 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_UTILS_H -#define TRT_SAMPLE_UTILS_H - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "NvInfer.h" - -#include "common.h" -#include "logger.h" -#include "sampleDevice.h" -#include "sampleOptions.h" - -namespace sample -{ - -inline int dataTypeSize(nvinfer1::DataType dataType) -{ - switch (dataType) - { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: return 4; - case nvinfer1::DataType::kHALF: return 2; - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kINT8: return 1; - } - return 0; -} - -template -inline T roundUp(T m, T n) -{ - return ((m + n - 1) / n) * n; -} - -inline int volume(const nvinfer1::Dims& d) -{ - return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); -} - -//! comps is the number of components in a vector. Ignored if vecDim < 0. -inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides, int vecDim, int comps, int batch) -{ - int maxNbElems = 1; - for (int i = 0; i < dims.nbDims; ++i) - { - // Get effective length of axis. - int d = dims.d[i]; - // Any dimension is 0, it is an empty tensor. - if (d == 0) - { - return 0; - } - if (i == vecDim) - { - d = samplesCommon::divUp(d, comps); - } - maxNbElems = std::max(maxNbElems, d * strides.d[i]); - } - return static_cast(maxNbElems) * batch * (vecDim < 0 ? 1 : comps); -} - -inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) -{ - if (vecDim != -1) - { - dims.d[vecDim] = roundUp(dims.d[vecDim], comps); - } - return volume(dims) * std::max(batch, 1); -} - -inline nvinfer1::Dims toDims(const std::vector& vec) -{ - int limit = static_cast(nvinfer1::Dims::MAX_DIMS); - if (static_cast(vec.size()) > limit) - { - sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl; - } - // Pick first nvinfer1::Dims::MAX_DIMS elements - nvinfer1::Dims dims{std::min(static_cast(vec.size()), limit), {}}; - std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); - return dims; -} - -template -inline void fillBuffer(void* buffer, int64_t volume, T min, T max) -{ - T* typedBuffer = static_cast(buffer); - std::default_random_engine engine; - if (std::is_integral::value) - { - std::uniform_int_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; - std::generate(typedBuffer, typedBuffer + volume, generator); - } - else - { - std::uniform_real_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; - std::generate(typedBuffer, typedBuffer + volume, generator); - } -} - -// Specialization needed for custom type __half -template -inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) -{ - H* typedBuffer = static_cast(buffer); - std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; - std::generate(typedBuffer, typedBuffer + volume, generator); -} -template <> -inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max) -{ - fillBufferHalf(buffer, volume, min, max); -} - -template -inline void dumpBuffer(const void* buffer, const std::string& separator, std::ostream& os, const nvinfer1::Dims& dims, - const nvinfer1::Dims& strides, int32_t vectorDim, int32_t spv) -{ - const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); - const T* typedBuffer = static_cast(buffer); - std::string sep; - for (int64_t v = 0; v < volume; ++v) - { - int64_t curV = v; - int32_t dataOffset = 0; - for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) - { - int32_t dimVal = curV % dims.d[dimIndex]; - if (dimIndex == vectorDim) - { - dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; - } - else - { - dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); - } - curV /= dims.d[dimIndex]; - ASSERT(curV >= 0); - } - - os << sep << typedBuffer[dataOffset]; - sep = separator; - } -} - -inline void loadFromFile(std::string const& fileName, char* dst, size_t size) -{ - ASSERT(dst); - - std::ifstream file(fileName, std::ios::in | std::ios::binary); - if (file.is_open()) - { - file.read(dst, size); - file.close(); - } - else - { - std::stringstream msg; - msg << "Cannot open file " << fileName << "!"; - throw std::invalid_argument(msg.str()); - } -} - -struct Binding -{ - bool isInput{false}; - std::unique_ptr buffer; - int64_t volume{0}; - nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT}; - - void fill(const std::string& fileName) - { - loadFromFile(fileName, static_cast(buffer->getHostBuffer()), buffer->getSize()); - } - - void fill() - { - switch (dataType) - { - case nvinfer1::DataType::kBOOL: - { - fillBuffer(buffer->getHostBuffer(), volume, 0, 1); - break; - } - case nvinfer1::DataType::kINT32: - { - fillBuffer(buffer->getHostBuffer(), volume, -128, 127); - break; - } - case nvinfer1::DataType::kINT8: - { - fillBuffer(buffer->getHostBuffer(), volume, -128, 127); - break; - } - case nvinfer1::DataType::kFLOAT: - { - fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); - break; - } - case nvinfer1::DataType::kHALF: - { - fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F); - break; - } - } - } - - void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv, - const std::string separator = " ") const - { - switch (dataType) - { - case nvinfer1::DataType::kBOOL: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kINT32: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kINT8: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kFLOAT: - { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - case nvinfer1::DataType::kHALF: - { - dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides, vectorDim, spv); - break; - } - } - } -}; - -class Bindings -{ -public: - Bindings() = delete; - explicit Bindings(bool useManaged) - : mUseManaged(useManaged) - { - } - - void addBinding(int b, const std::string& name, bool isInput, int64_t volume, nvinfer1::DataType dataType, - const std::string& fileName = "") - { - while (mBindings.size() <= static_cast(b)) - { - mBindings.emplace_back(); - mDevicePointers.emplace_back(); - } - mNames[name] = b; - if (mBindings[b].buffer == nullptr) - { - if (mUseManaged) - mBindings[b].buffer.reset(new UnifiedMirroredBuffer); - else - mBindings[b].buffer.reset(new DiscreteMirroredBuffer); - } - mBindings[b].isInput = isInput; - // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr - // even for empty tensors, so allocate a dummy byte. - if (volume == 0) - mBindings[b].buffer->allocate(1); - else - mBindings[b].buffer->allocate(static_cast(volume) * static_cast(dataTypeSize(dataType))); - - mBindings[b].volume = volume; - mBindings[b].dataType = dataType; - mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer(); - if (isInput) - { - if (fileName.empty()) - fill(b); - else - fill(b, fileName); - } - } - - void** getDeviceBuffers() - { - return mDevicePointers.data(); - } - - void transferInputToDevice(TrtCudaStream& stream) - { - for (auto& b : mNames) - { - if (mBindings[b.second].isInput) - mBindings[b.second].buffer->hostToDevice(stream); - } - } - - void transferOutputToHost(TrtCudaStream& stream) - { - for (auto& b : mNames) - { - if (!mBindings[b.second].isInput) - mBindings[b.second].buffer->deviceToHost(stream); - } - } - - void fill(int binding, const std::string& fileName) - { - mBindings[binding].fill(fileName); - } - - void fill(int binding) - { - mBindings[binding].fill(); - } - - void dumpBindingDimensions(int binding, const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - const auto dims = context.getBindingDimensions(binding); - // Do not add a newline terminator, because the caller may be outputting a JSON string. - os << dims; - } - - void dumpBindingValues(const nvinfer1::IExecutionContext& context, int binding, std::ostream& os, - const std::string& separator = " ", int32_t batch = 1) const - { - nvinfer1::Dims dims = context.getBindingDimensions(binding); - nvinfer1::Dims strides = context.getStrides(binding); - int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding); - const int32_t spv = context.getEngine().getBindingComponentsPerElement(binding); - - if (context.getEngine().hasImplicitBatchDimension()) - { - auto insertN = [](nvinfer1::Dims& d, int32_t bs) { - const int32_t nbDims = d.nbDims; - ASSERT(nbDims < nvinfer1::Dims::MAX_DIMS); - std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]); - d.d[0] = bs; - d.nbDims = nbDims + 1; - }; - int32_t batchStride = 0; - for (int32_t i = 0; i < strides.nbDims; ++i) - { - if (strides.d[i] * dims.d[i] > batchStride) - { - batchStride = strides.d[i] * dims.d[i]; - } - } - insertN(dims, batch); - insertN(strides, batchStride); - vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1; - } - - mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator); - } - - void dumpInputs(const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - auto isInput = [](const Binding& b) { return b.isInput; }; - dumpBindings(context, isInput, os); - } - - void dumpOutputs(const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - auto isOutput = [](const Binding& b) { return !b.isInput; }; - dumpBindings(context, isOutput, os); - } - - void dumpBindings(const nvinfer1::IExecutionContext& context, std::ostream& os) const - { - auto all = [](const Binding& /*b*/) { return true; }; - dumpBindings(context, all, os); - } - - void dumpBindings( - const nvinfer1::IExecutionContext& context, bool (*predicate)(const Binding& b), std::ostream& os) const - { - for (const auto& n : mNames) - { - const auto binding = n.second; - if (predicate(mBindings[binding])) - { - os << n.first << ": ("; - dumpBindingDimensions(binding, context, os); - os << ")" << std::endl; - - dumpBindingValues(context, binding, os); - os << std::endl; - } - } - } - - std::unordered_map getInputBindings() const - { - auto isInput = [](const Binding& b) { return b.isInput; }; - return getBindings(isInput); - } - - std::unordered_map getOutputBindings() const - { - auto isOutput = [](const Binding& b) { return !b.isInput; }; - return getBindings(isOutput); - } - - std::unordered_map getBindings() const - { - auto all = [](const Binding& /*b*/) { return true; }; - return getBindings(all); - } - - std::unordered_map getBindings(bool (*predicate)(const Binding& b)) const - { - std::unordered_map bindings; - for (const auto& n : mNames) - { - const auto binding = n.second; - if (predicate(mBindings[binding])) - bindings.insert(n); - } - return bindings; - } - -private: - std::unordered_map mNames; - std::vector mBindings; - std::vector mDevicePointers; - bool mUseManaged{false}; -}; - -template -struct TrtDestroyer -{ - void operator()(T* t) - { - //t->destroy(); - delete t; - } -}; - -template -using TrtUniquePtr = std::unique_ptr>; - -inline bool broadcastIOFormats(const std::vector& formats, size_t nbBindings, bool isInput = true) -{ - bool broadcast = formats.size() == 1; - bool validFormatsCount = broadcast || (formats.size() == nbBindings); - if (!formats.empty() && !validFormatsCount) - { - if (isInput) - { - throw std::invalid_argument( - "The number of inputIOFormats must match network's inputs or be one for broadcasting."); - } - else - { - throw std::invalid_argument( - "The number of outputIOFormats must match network's outputs or be one for broadcasting."); - } - } - return broadcast; -} - -inline std::vector loadTimingCacheFile(const std::string inFileName) -{ - std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); - if (!iFile) - { - sample::gLogWarning << "Could not read timing cache from: " << inFileName - << ". A new timing cache will be generated and written." << std::endl; - return std::vector(); - } - iFile.seekg(0, std::ifstream::end); - size_t fsize = iFile.tellg(); - iFile.seekg(0, std::ifstream::beg); - std::vector content(fsize); - iFile.read(content.data(), fsize); - iFile.close(); - sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from " << inFileName << std::endl; - return content; -} - -inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob) -{ - std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); - if (!oFile) - { - sample::gLogWarning << "Could not write timing cache to: " << outFileName << std::endl; - return; - } - oFile.write((char*) blob->data(), blob->size()); - oFile.close(); - sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to " << outFileName << std::endl; -} - -inline int32_t getCudaDriverVersion() -{ - int32_t version{-1}; - cudaCheck(cudaDriverGetVersion(&version)); - return version; -} - -inline int32_t getCudaRuntimeVersion() -{ - int32_t version{-1}; - cudaCheck(cudaRuntimeGetVersion(&version)); - return version; -} - -} // namespace sample - -#endif // TRT_SAMPLE_UTILS_H diff --git a/src/Detector/tensorrt_yolo/ds_image.cpp b/src/Detector/tensorrt_yolo/ds_image.cpp index b801b8747..77404f974 100644 --- a/src/Detector/tensorrt_yolo/ds_image.cpp +++ b/src/Detector/tensorrt_yolo/ds_image.cpp @@ -50,7 +50,8 @@ DsImage::DsImage(const cv::Mat& mat_image_, tensor_rt::ModelType net_type, const if (tensor_rt::ModelType::YOLOV5 == net_type || tensor_rt::ModelType::YOLOV6 == net_type || tensor_rt::ModelType::YOLOV7 == net_type || tensor_rt::ModelType::YOLOV7Mask == net_type || tensor_rt::ModelType::YOLOV8 == net_type || tensor_rt::ModelType::YOLOV8_OBB == net_type || tensor_rt::ModelType::YOLOV8Mask == net_type || - tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type) + tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type || + tensor_rt::ModelType::YOLOV11 == net_type || tensor_rt::ModelType::YOLOV11_OBB == net_type || tensor_rt::ModelType::YOLOV11Mask == net_type) { // resize the DsImage with scale float r = std::min(static_cast(inputH) / static_cast(m_Height), static_cast(inputW) / static_cast(m_Width)); @@ -101,7 +102,8 @@ DsImage::DsImage(const std::string& path, tensor_rt::ModelType net_type, const i if (tensor_rt::ModelType::YOLOV5 == net_type || tensor_rt::ModelType::YOLOV6 == net_type || tensor_rt::ModelType::YOLOV7 == net_type || tensor_rt::ModelType::YOLOV7Mask == net_type || tensor_rt::ModelType::YOLOV8 == net_type || tensor_rt::ModelType::YOLOV8_OBB == net_type || tensor_rt::ModelType::YOLOV8Mask == net_type || - tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type) + tensor_rt::ModelType::YOLOV9 == net_type || tensor_rt::ModelType::YOLOV10 == net_type || + tensor_rt::ModelType::YOLOV11 == net_type || tensor_rt::ModelType::YOLOV11_OBB == net_type || tensor_rt::ModelType::YOLOV11Mask == net_type) { // resize the DsImage with scale float dim = std::max(m_Height, m_Width);