diff --git a/src/Detector/darknet/CMakeLists.txt b/src/Detector/darknet/CMakeLists.txt index 26c2185f9..dec1ff078 100644 --- a/src/Detector/darknet/CMakeLists.txt +++ b/src/Detector/darknet/CMakeLists.txt @@ -76,10 +76,6 @@ set(DARKNET_HEADER_FILES ${DARKNET_HEADER_FILES} include/darknet.h include/yolo_ file(GLOB DARKNET_CUDA_FILES src/*.cu) -add_definitions(-DYOLO_DETECTOR_EXPORTS) -add_definitions(-DLIB_EXPORTS) -add_definitions(-DGPU) -add_definitions(-DCUDNN) # add_definitions(-DOPENCV) #if(NOT CMAKE_DEBUG_POSTFIX) @@ -107,6 +103,13 @@ set(YOLO_LIBS target_link_libraries(${libname} ${YOLO_LIBS}) +target_compile_definitions(${libname} PRIVATE -DYOLO_DETECTOR_EXPORTS) +target_compile_definitions(${libname} PRIVATE -DLIB_EXPORTS) +target_compile_definitions(${libname} PRIVATE -DGPU) +target_compile_definitions(${libname} PRIVATE -DCUDNN) +target_compile_definitions(${libname} PRIVATE -DCUDNN_HALF) + + install(TARGETS ${libname} EXPORT MTTrackingExports ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib diff --git a/src/Detector/darknet/cmake/FindCUDNN.cmake b/src/Detector/darknet/cmake/FindCUDNN.cmake index 37388d30b..7a692b055 100644 --- a/src/Detector/darknet/cmake/FindCUDNN.cmake +++ b/src/Detector/darknet/cmake/FindCUDNN.cmake @@ -1,180 +1,104 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. +# Distributed under the OSI-approved BSD 3-Clause License. +# Copyright Stefano Sinigardi + #.rst: # FindCUDNN -# ------- -# -# Find CUDNN library -# -# Valiables that affect result: -# , , : as usual -# -# : as usual, plus we do find '5.1' version if you wanted '5' -# (not if you wanted '5.0', as usual) +# -------- # -# Result variables +# Result Variables # ^^^^^^^^^^^^^^^^ # -# This module will set the following variables in your project: +# This module will set the following variables in your project:: # -# ``CUDNN_INCLUDE`` -# where to find cudnn.h. -# ``CUDNN_LIBRARY`` -# the libraries to link against to use CUDNN. -# ``CUDNN_FOUND`` -# If false, do not try to use CUDNN. -# ``CUDNN_VERSION`` -# Version of the CUDNN library we looked for +# ``CUDNN_FOUND`` +# True if CUDNN found on the local system # -# Exported functions -# ^^^^^^^^^^^^^^^^ -# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir]) -# This function will try to download and install CUDNN. -# CUDNN5 and CUDNN6 are supported. +# ``CUDNN_INCLUDE_DIRS`` +# Location of CUDNN header files. # +# ``CUDNN_LIBRARIES`` +# The CUDNN libraries. +# +# ``CuDNN::CuDNN`` +# The CUDNN target # -function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir) - message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}") - string(REGEX REPLACE "-rc$" "" version_base "${version}") - set(tar_libdir cuda/lib64) - set(tar_incdir cuda/include) - - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(url_extension tgz) - if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(url_arch_name linux-x64 ) - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc") - set(url_arch_name linux-ppc64le ) - # TX1 has to be installed via JetPack - endif() - elseif (APPLE) - set(url_extension tgz) - set(tar_libdir cuda/lib) - set(url_arch_name osx-x64) - elseif(WIN32) - set(url_extension zip) - set(tar_bindir cuda/bin) - set(tar_libdir cuda/lib/x64) - if(CMAKE_SYSTEM_VERSION MATCHES "10") - set(url_arch_name windows10-x64) - else() - set(url_arch_name windows7-x64) - endif() - endif() - - # Download and install CUDNN locally if not found on the system - if(url_arch_name) - set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version}) - file(MAKE_DIRECTORY ${download_dir}) - set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension}) - set(base_url http://developer.download.nvidia.com/compute/redist/cudnn) - set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename}) - set(cudnn_file ${download_dir}/${cudnn_filename}) - - if(NOT EXISTS ${cudnn_file}) - message(STATUS "Downloading CUDNN library from NVIDIA...") - file(DOWNLOAD ${cudnn_url} ${cudnn_file} - SHOW_PROGRESS STATUS cudnn_status - ) - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status) - - if(NOT "${cudnn_status}" MATCHES "0") - message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN") - endif() - endif() - - if(dest_bindir AND tar_bindir) - file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir}) - endif() - - if(dest_incdir) - file(COPY ${download_dir}/${tar_incdir}/ DESTINATION ${dest_incdir}) - endif() - - file(COPY ${download_dir}/${tar_libdir}/ DESTINATION ${dest_libdir} ) - - get_filename_component(dest_dir ${dest_libdir} DIRECTORY) - - set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE) - unset(CUDNN_LIBRARY CACHE) - unset(CUDNN_INCLUDE_DIR CACHE) - - endif(url_arch_name) -endfunction() - -##################################################### - -find_package(PkgConfig) -pkg_check_modules(PC_CUDNN QUIET CUDNN) - -get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH) - -# We use major only in library search as major/minor is not entirely consistent among platforms. -# Also, looking for exact minor version of .so is in general not a good idea. -# More strict enforcement of minor/patch version is done if/when the header file is examined. -if(CUDNN_FIND_VERSION_EXACT) - SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}") - SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR}) -else() - SET(__cudnn_lib_win_name cudnn64) +include(FindPackageHandleStandardArgs) + +find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h cudnn_v8.h cudnn_v7.h + HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/include + PATH_SUFFIXES cuda/include include) +find_library(CUDNN_LIBRARY NAMES cudnn cudnn8 cudnn7 + HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/lib/x86_64-linux-gnu/ + PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64 cuda/lib/x64) +if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v8.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v8.h CUDNN_HEADER_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v7.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v7.h CUDNN_HEADER_CONTENTS) endif() - -find_library(CUDNN_LIBRARY - NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} - PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} - PATH_SUFFIXES lib lib64 bin - DOC "CUDNN library." ) - -if(CUDNN_LIBRARY) - SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR}) - set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}) - get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH) - find_path(CUDNN_INCLUDE_DIR - NAMES cudnn.h - HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} - PATH_SUFFIXES include - DOC "Path to CUDNN include directory." ) +if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) endif() - -if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) - file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) +if(CUDNN_HEADER_CONTENTS) string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" - CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + _CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" - CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + _CUDNN_VERSION_MAJOR "${_CUDNN_VERSION_MAJOR}") string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" - CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + _CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" - CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + _CUDNN_VERSION_MINOR "${_CUDNN_VERSION_MINOR}") string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" - CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + _CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" - CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") - set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}) -endif() - -if(CUDNN_MAJOR_VERSION) - ## Fixing the case where 5.1 does not fit 'exact' 5. - if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR) - if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}") - set(CUDNN_VERSION ${CUDNN_FIND_VERSION}) - endif() + _CUDNN_VERSION_PATCH "${_CUDNN_VERSION_PATCH}") + if(NOT _CUDNN_VERSION_MAJOR) + set(CUDNN_VERSION "?") + else() + set(CUDNN_VERSION "${_CUDNN_VERSION_MAJOR}.${_CUDNN_VERSION_MINOR}.${_CUDNN_VERSION_PATCH}") endif() -else() - # Try to set CUDNN version from config file - set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER}) endif() -find_package_handle_standard_args( - CUDNN - FOUND_VAR CUDNN_FOUND - REQUIRED_VARS CUDNN_LIBRARY - VERSION_VAR CUDNN_VERSION - ) +set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) +set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) +mark_as_advanced(CUDNN_LIBRARY CUDNN_INCLUDE_DIR) -if(CUDNN_FOUND) - set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) - set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) - set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER}) -endif() +find_package_handle_standard_args(CUDNN + REQUIRED_VARS CUDNN_INCLUDE_DIR CUDNN_LIBRARY + VERSION_VAR CUDNN_VERSION +) + +if(WIN32) + set(CUDNN_DLL_DIR ${CUDNN_INCLUDE_DIR}) + list(TRANSFORM CUDNN_DLL_DIR APPEND "/../bin") + find_file(CUDNN_LIBRARY_DLL NAMES cudnn64_${CUDNN_VERSION_MAJOR}.dll PATHS ${CUDNN_DLL_DIR}) +endif() + +if( CUDNN_FOUND AND NOT TARGET CuDNN::CuDNN ) + if( EXISTS "${CUDNN_LIBRARY_DLL}" ) + add_library( CuDNN::CuDNN SHARED IMPORTED ) + set_target_properties( CuDNN::CuDNN PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY_DLL}" + IMPORTED_IMPLIB "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" ) + else() + add_library( CuDNN::CuDNN UNKNOWN IMPORTED ) + set_target_properties( CuDNN::CuDNN PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" ) + endif() +endif() diff --git a/src/Detector/darknet/cmake/FindCUDNN_.cmake b/src/Detector/darknet/cmake/FindCUDNN_.cmake new file mode 100644 index 000000000..37388d30b --- /dev/null +++ b/src/Detector/darknet/cmake/FindCUDNN_.cmake @@ -0,0 +1,180 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. +#.rst: +# FindCUDNN +# ------- +# +# Find CUDNN library +# +# Valiables that affect result: +# , , : as usual +# +# : as usual, plus we do find '5.1' version if you wanted '5' +# (not if you wanted '5.0', as usual) +# +# Result variables +# ^^^^^^^^^^^^^^^^ +# +# This module will set the following variables in your project: +# +# ``CUDNN_INCLUDE`` +# where to find cudnn.h. +# ``CUDNN_LIBRARY`` +# the libraries to link against to use CUDNN. +# ``CUDNN_FOUND`` +# If false, do not try to use CUDNN. +# ``CUDNN_VERSION`` +# Version of the CUDNN library we looked for +# +# Exported functions +# ^^^^^^^^^^^^^^^^ +# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir]) +# This function will try to download and install CUDNN. +# CUDNN5 and CUDNN6 are supported. +# +# + +function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir) + message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}") + string(REGEX REPLACE "-rc$" "" version_base "${version}") + set(tar_libdir cuda/lib64) + set(tar_incdir cuda/include) + + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") + set(url_extension tgz) + if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + set(url_arch_name linux-x64 ) + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc") + set(url_arch_name linux-ppc64le ) + # TX1 has to be installed via JetPack + endif() + elseif (APPLE) + set(url_extension tgz) + set(tar_libdir cuda/lib) + set(url_arch_name osx-x64) + elseif(WIN32) + set(url_extension zip) + set(tar_bindir cuda/bin) + set(tar_libdir cuda/lib/x64) + if(CMAKE_SYSTEM_VERSION MATCHES "10") + set(url_arch_name windows10-x64) + else() + set(url_arch_name windows7-x64) + endif() + endif() + + # Download and install CUDNN locally if not found on the system + if(url_arch_name) + set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version}) + file(MAKE_DIRECTORY ${download_dir}) + set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension}) + set(base_url http://developer.download.nvidia.com/compute/redist/cudnn) + set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename}) + set(cudnn_file ${download_dir}/${cudnn_filename}) + + if(NOT EXISTS ${cudnn_file}) + message(STATUS "Downloading CUDNN library from NVIDIA...") + file(DOWNLOAD ${cudnn_url} ${cudnn_file} + SHOW_PROGRESS STATUS cudnn_status + ) + execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status) + + if(NOT "${cudnn_status}" MATCHES "0") + message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN") + endif() + endif() + + if(dest_bindir AND tar_bindir) + file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir}) + endif() + + if(dest_incdir) + file(COPY ${download_dir}/${tar_incdir}/ DESTINATION ${dest_incdir}) + endif() + + file(COPY ${download_dir}/${tar_libdir}/ DESTINATION ${dest_libdir} ) + + get_filename_component(dest_dir ${dest_libdir} DIRECTORY) + + set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE) + unset(CUDNN_LIBRARY CACHE) + unset(CUDNN_INCLUDE_DIR CACHE) + + endif(url_arch_name) +endfunction() + +##################################################### + +find_package(PkgConfig) +pkg_check_modules(PC_CUDNN QUIET CUDNN) + +get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH) + +# We use major only in library search as major/minor is not entirely consistent among platforms. +# Also, looking for exact minor version of .so is in general not a good idea. +# More strict enforcement of minor/patch version is done if/when the header file is examined. +if(CUDNN_FIND_VERSION_EXACT) + SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}") + SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR}) +else() + SET(__cudnn_lib_win_name cudnn64) +endif() + +find_library(CUDNN_LIBRARY + NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} + PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} + PATH_SUFFIXES lib lib64 bin + DOC "CUDNN library." ) + +if(CUDNN_LIBRARY) + SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR}) + set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}) + get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH) + find_path(CUDNN_INCLUDE_DIR + NAMES cudnn.h + HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} + PATH_SUFFIXES include + DOC "Path to CUDNN include directory." ) +endif() + +if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") + set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}) +endif() + +if(CUDNN_MAJOR_VERSION) + ## Fixing the case where 5.1 does not fit 'exact' 5. + if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR) + if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}") + set(CUDNN_VERSION ${CUDNN_FIND_VERSION}) + endif() + endif() +else() + # Try to set CUDNN version from config file + set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER}) +endif() + +find_package_handle_standard_args( + CUDNN + FOUND_VAR CUDNN_FOUND + REQUIRED_VARS CUDNN_LIBRARY + VERSION_VAR CUDNN_VERSION + ) + +if(CUDNN_FOUND) + set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) + set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) + set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER}) +endif() diff --git a/src/Detector/darknet/include/darknet.h b/src/Detector/darknet/include/darknet.h index d72027cc4..55ab50d5d 100644 --- a/src/Detector/darknet/include/darknet.h +++ b/src/Detector/darknet/include/darknet.h @@ -1032,6 +1032,11 @@ LIB_API void diounms_sort(detection *dets, int total, int classes, float thresh, // network.h LIB_API float *network_predict(network net, float *input); LIB_API float *network_predict_ptr(network *net, float *input); +#ifdef CUDA_OPENGL_INTEGRATION +LIB_API float *network_predict_gl_texture(network *net, uint32_t texture_id); +#endif // CUDA_OPENGL_INTEGRATION + +LIB_API void set_batch_network(network *net, int b); LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); LIB_API det_num_pair* network_predict_batch(network *net, image im, int batch_size, int w, int h, float thresh, float hier, int *map, int relative, int letter); LIB_API void free_detections(detection *dets, int n); @@ -1047,7 +1052,7 @@ LIB_API void reset_rnn(network *net); LIB_API float *network_predict_image(network *net, image im); LIB_API float *network_predict_image_letterbox(network *net, image im); LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, const int map_points, int letter_box, network *existing_net); -LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path); +LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path, int mAP_epochs); LIB_API void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers); LIB_API int network_width(network *net); diff --git a/src/Detector/darknet/src/activation_kernels.cu b/src/Detector/darknet/src/activation_kernels.cu index 25b5cfdc8..d2dc77199 100644 --- a/src/Detector/darknet/src/activation_kernels.cu +++ b/src/Detector/darknet/src/activation_kernels.cu @@ -185,7 +185,7 @@ __global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) { - binary_gradient_array_kernel << > >(x, dx, n / 2, size, a, y); + binary_gradient_array_kernel <<>>(x, dx, n / 2, size, a, y); CHECK_CUDA(cudaPeekAtLastError()); } __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) @@ -200,7 +200,7 @@ __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTI extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) { - binary_activate_array_kernel << > >(x, n / 2, size, a, y); + binary_activate_array_kernel <<>>(x, n / 2, size, a, y); CHECK_CUDA(cudaPeekAtLastError()); } @@ -260,7 +260,7 @@ __global__ void activate_array_mish_kernel(float *x, int n, float *activation_in { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (i < n) { - const float MISH_THRESHOLD = 20; + //const float MISH_THRESHOLD = 20; float x_val = x[i]; if (activation_input) activation_input[i] = x_val; // store value before activation //output_gpu[i] = x_val * tanh_activate_kernel(logf(1 + expf(x_val))); @@ -495,14 +495,14 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) { const int num_blocks = get_number_of_blocks(n, BLOCK); if (a == LINEAR) return; - else if (a == LEAKY || a == REVLEAKY) activate_array_leaky_kernel << > >(x, n); - else if (a == LOGISTIC) activate_array_logistic_kernel << > >(x, n); - else if (a == TANH) activate_array_tanh_kernel << > >(x, n); - else if (a == HARDTAN) activate_array_hardtan_kernel << > >(x, n); - else if (a == RELU) activate_array_relu_kernel << > >(x, n); - else if (a == RELU6) activate_array_relu6_kernel << > >(x, n); - else if (a == SELU) activate_array_selu_kernel << > >(x, n); - else if (a == GELU) activate_array_gelu_kernel << > >(x, n); + else if (a == LEAKY || a == REVLEAKY) activate_array_leaky_kernel <<>>(x, n); + else if (a == LOGISTIC) activate_array_logistic_kernel <<>>(x, n); + else if (a == TANH) activate_array_tanh_kernel <<>>(x, n); + else if (a == HARDTAN) activate_array_hardtan_kernel <<>>(x, n); + else if (a == RELU) activate_array_relu_kernel <<>>(x, n); + else if (a == RELU6) activate_array_relu6_kernel <<>>(x, n); + else if (a == SELU) activate_array_selu_kernel <<>>(x, n); + else if (a == GELU) activate_array_gelu_kernel <<>>(x, n); else activate_array_kernel<<>>(x, n, a); CHECK_CUDA(cudaPeekAtLastError()); @@ -511,21 +511,21 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) extern "C" void activate_array_swish_ongpu(float *x, int n, float *output_sigmoid_gpu, float *output_gpu) { const int num_blocks = get_number_of_blocks(n, BLOCK); - activate_array_swish_kernel << > >(x, n, output_sigmoid_gpu, output_gpu); + activate_array_swish_kernel <<>>(x, n, output_sigmoid_gpu, output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } extern "C" void activate_array_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu) { const int num_blocks = get_number_of_blocks(n, BLOCK); - activate_array_mish_kernel << > >(x, n, activation_input_gpu, output_gpu); + activate_array_mish_kernel <<>>(x, n, activation_input_gpu, output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } extern "C" void activate_array_hard_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu) { const int num_blocks = get_number_of_blocks(n, BLOCK); - activate_array_hard_mish_kernel << > >(x, n, activation_input_gpu, output_gpu); + activate_array_hard_mish_kernel <<>>(x, n, activation_input_gpu, output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -533,22 +533,21 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta { const int num_blocks = get_number_of_blocks(n, BLOCK); if (a == LINEAR) return; - else if (a == LEAKY) gradient_array_leaky_kernel << > >(x, n, delta); - else if (a == REVLEAKY) gradient_array_revleaky_kernel << > >(x, n, delta); - else if (a == LOGISTIC) gradient_array_logistic_kernel << > >(x, n, delta); - else if (a == TANH) gradient_array_tanh_kernel << > >(x, n, delta); - else if (a == HARDTAN) gradient_array_hardtan_kernel << > >(x, n, delta); - else if (a == RELU) gradient_array_relu_kernel << > >(x, n, delta); - else if (a == RELU6) gradient_array_relu6_kernel << > >(x, n, delta); - //else if (a == NORM_CHAN) gradient_array_relu_kernel << > >(x, n, delta); + else if (a == LEAKY) gradient_array_leaky_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + else if (a == REVLEAKY) gradient_array_revleaky_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + else if (a == LOGISTIC) gradient_array_logistic_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + else if (a == TANH) gradient_array_tanh_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + else if (a == HARDTAN) gradient_array_hardtan_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + else if (a == RELU) gradient_array_relu_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + else if (a == RELU6) gradient_array_relu6_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta); + //else if (a == NORM_CHAN) gradient_array_relu_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n, delta); else if (a == NORM_CHAN_SOFTMAX || a == NORM_CHAN) { - printf(" Error: should be used custom NORM_CHAN_SOFTMAX-function for gradient \n"); - exit(0); + error("Error: should be used custom NORM_CHAN_SOFTMAX-function for gradient", DARKNET_LOC); } - else if (a == SELU) gradient_array_selu_kernel << > >(x, n, delta); - else if (a == GELU) gradient_array_gelu_kernel << > >(x, n, delta); + else if (a == SELU) gradient_array_selu_kernel <<>>(x, n, delta); + else if (a == GELU) gradient_array_gelu_kernel <<>>(x, n, delta); else - gradient_array_kernel << > > (x, n, a, delta); + gradient_array_kernel <<>> (x, n, a, delta); CHECK_CUDA(cudaPeekAtLastError()); } @@ -556,21 +555,21 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta extern "C" void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delta) { const int num_blocks = get_number_of_blocks(n, BLOCK); - gradient_array_swish_kernel << > > (x, n, sigmoid_gpu, delta); + gradient_array_swish_kernel <<>> (x, n, sigmoid_gpu, delta); CHECK_CUDA(cudaPeekAtLastError()); } extern "C" void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta) { const int num_blocks = get_number_of_blocks(n, BLOCK); - gradient_array_mish_kernel << > > (n, activation_input_gpu, delta); + gradient_array_mish_kernel <<>> (n, activation_input_gpu, delta); CHECK_CUDA(cudaPeekAtLastError()); } extern "C" void gradient_array_hard_mish_ongpu(int n, float *activation_input_gpu, float *delta) { const int num_blocks = get_number_of_blocks(n, BLOCK); - gradient_array_hard_mish_kernel << > > (n, activation_input_gpu, delta); + gradient_array_hard_mish_kernel <<>> (n, activation_input_gpu, delta); CHECK_CUDA(cudaPeekAtLastError()); } @@ -607,7 +606,7 @@ extern "C" void activate_array_normalize_channels_ongpu(float *x, int n, int bat const int num_blocks = get_number_of_blocks(size, BLOCK); - activate_array_normalize_channels_kernel << > > (x, size, batch, channels, wh_step, output_gpu); + activate_array_normalize_channels_kernel <<>> (x, size, batch, channels, wh_step, output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -655,7 +654,7 @@ extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, const int num_blocks = get_number_of_blocks(size, BLOCK); - activate_array_normalize_channels_softmax_kernel << > > (x, size, batch, channels, wh_step, output_gpu, use_max_val); + activate_array_normalize_channels_softmax_kernel <<>> (x, size, batch, channels, wh_step, output_gpu, use_max_val); CHECK_CUDA(cudaPeekAtLastError()); } @@ -698,7 +697,7 @@ extern "C" void gradient_array_normalize_channels_softmax_ongpu(float *output_gp const int num_blocks = get_number_of_blocks(size, BLOCK); - gradient_array_normalize_channels_softmax_kernel << > > (output_gpu, size, batch, channels, wh_step, delta_gpu); + gradient_array_normalize_channels_softmax_kernel <<>> (output_gpu, size, batch, channels, wh_step, delta_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -741,6 +740,6 @@ extern "C" void gradient_array_normalize_channels_ongpu(float *output_gpu, int n const int num_blocks = get_number_of_blocks(size, BLOCK); - gradient_array_normalize_channels_kernel << > > (output_gpu, size, batch, channels, wh_step, delta_gpu); + gradient_array_normalize_channels_kernel <<>> (output_gpu, size, batch, channels, wh_step, delta_gpu); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/Detector/darknet/src/activations.c b/src/Detector/darknet/src/activations.c index 87ba1d9e3..9e8a49f72 100644 --- a/src/Detector/darknet/src/activations.c +++ b/src/Detector/darknet/src/activations.c @@ -323,9 +323,7 @@ float gradient(float x, ACTIVATION a) case NORM_CHAN_SOFTMAX_MAXVAL: //... case NORM_CHAN_SOFTMAX: - printf(" Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient \n"); - exit(0); - return 0; + error("Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient", DARKNET_LOC); case ELU: return elu_gradient(x); case SELU: diff --git a/src/Detector/darknet/src/art.c b/src/Detector/darknet/src/art.c index 5fbcfcead..748e19038 100644 --- a/src/Detector/darknet/src/art.c +++ b/src/Detector/darknet/src/art.c @@ -40,8 +40,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index) float *p = network_predict(net, in_s.data); - printf("\033[2J"); - printf("\033[1;1H"); + printf("\033[H\033[J"); float score = 0; for(i = 0; i < n; ++i){ @@ -51,7 +50,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index) score = score; printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); printf("["); - int upper = 30; + int upper = 30; for(i = 0; i < upper; ++i){ printf("%c", ((i+.5) < score*upper) ? 219 : ' '); } diff --git a/src/Detector/darknet/src/blas.c b/src/Detector/darknet/src/blas.c index 884d24af9..122bca0ce 100644 --- a/src/Detector/darknet/src/blas.c +++ b/src/Detector/darknet/src/blas.c @@ -342,32 +342,32 @@ void fill_cpu(int N, float ALPHA, float *X, int INCX) } } -void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUTPUT) { int i, j; int index = 0; for(j = 0; j < B; ++j) { for(i = 0; i < NX; ++i){ - if(X) X[j*NX + i] += OUT[index]; + if(X) X[j*NX + i] += OUTPUT[index]; ++index; } for(i = 0; i < NY; ++i){ - if(Y) Y[j*NY + i] += OUT[index]; + if(Y) Y[j*NY + i] += OUTPUT[index]; ++index; } } } -void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUTPUT) { int i, j; int index = 0; for(j = 0; j < B; ++j) { for(i = 0; i < NX; ++i){ - OUT[index++] = X[j*NX + i]; + OUTPUT[index++] = X[j*NX + i]; } for(i = 0; i < NY; ++i){ - OUT[index++] = Y[j*NY + i]; + OUTPUT[index++] = Y[j*NY + i]; } } } @@ -511,7 +511,7 @@ void fix_nan_and_inf_cpu(float *input, size_t size) int i; for (i = 0; i < size; ++i) { float val = input[i]; - if (i > 0 && (isnan(val) || isinf(val))) + if (isnan(val) || isinf(val)) input[i] = 1.0f / i; // pseudo random value } } @@ -594,8 +594,8 @@ float find_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_ if (contrast_p[z].i == i && contrast_p[z].j == j) break; } if (z == contrast_p_size) { - printf(" Error: find_sim(): sim isn't found: i = %d, j = %d, z = %d \n", i, j, z); - getchar(); + printf(" Error: find_sim(): sim isn't found: i = %zu, j = %zu, z = %zu \n", i, j, z); + error("Error!", DARKNET_LOC); } return contrast_p[z].sim; @@ -608,8 +608,8 @@ float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, in if (contrast_p[z].i == i && contrast_p[z].j == j) break; } if (z == contrast_p_size) { - printf(" Error: find_P_constrastive(): P isn't found: i = %d, j = %d, z = %d \n", i, j, z); - getchar(); + printf(" Error: find_P_constrastive(): P isn't found: i = %zu, j = %zu, z = %zu \n", i, j, z); + error("Error!", DARKNET_LOC); } return contrast_p[z].P; @@ -648,8 +648,8 @@ float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int featu float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) { if (i == l) { - fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l); - getchar(); + fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %zu, l = %zu \n", i, l); + error("Error!", DARKNET_LOC); } const float sim = find_sim(i, l, contrast_p, contrast_p_size); // cosine_similarity(z[i], z[l], feature_size); @@ -686,8 +686,7 @@ void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, siz if (N == 0 || temperature == 0 || vec_len == 0) { fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f, labels[i] = %d \n", N, temperature, vec_len, labels[i]); - getchar(); - return; + error("Error!", DARKNET_LOC); } const float mult = 1 / ((N - 1) * temperature * vec_len); @@ -732,8 +731,7 @@ void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, siz if (N == 0 || temperature == 0 || vec_len == 0) { fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f, labels[i] = %d \n", N, temperature, vec_len, labels[i]); - getchar(); - return; + error("Error!", DARKNET_LOC); } const float mult = 1 / ((N - 1) * temperature * vec_len); @@ -782,8 +780,8 @@ void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, siz float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *exp_cos_sim) { if (i == l) { - fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l); - getchar(); + fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %zu, l = %zu \n", i, l); + error("Error!", DARKNET_LOC); } //const float sim = cos_sim[i*num_of_samples + l]; // cosine_similarity(z[i], z[l], feature_size); @@ -820,7 +818,7 @@ void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples } if (N == 0 || temperature == 0 || vec_len == 0) { fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len); - getchar(); + error("Error!", DARKNET_LOC); } const float mult = 1 / ((N - 1) * temperature * vec_len); @@ -860,7 +858,7 @@ void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples } if (N == 0 || temperature == 0 || vec_len == 0) { fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len); - getchar(); + error("Error!", DARKNET_LOC); } const float mult = 1 / ((N - 1) * temperature * vec_len); diff --git a/src/Detector/darknet/src/blas_kernels.cu b/src/Detector/darknet/src/blas_kernels.cu index 85c55adfb..3bc0d90b5 100644 --- a/src/Detector/darknet/src/blas_kernels.cu +++ b/src/Detector/darknet/src/blas_kernels.cu @@ -34,7 +34,7 @@ void compare_2_arrays_gpu(float *one, float *two, int size) { const int num_blocks = get_number_of_blocks(size, BLOCK); - compare_2_arrays_kernel << > >(one, two, size); + compare_2_arrays_kernel <<>>(one, two, size); CHECK_CUDA(cudaPeekAtLastError()); CHECK_CUDA(cudaDeviceSynchronize()); } @@ -53,7 +53,7 @@ void mean_array_gpu(float *src, int size, float alpha, float *avg) { const int num_blocks = get_number_of_blocks(size, BLOCK); - mean_array_kernel << > >(src, size, alpha, avg); + mean_array_kernel <<>>(src, size, alpha, avg); CHECK_CUDA(cudaPeekAtLastError()); } @@ -72,7 +72,7 @@ void scale_bias_gpu(float *output, float *scale, int batch, int filters, int spa const int current_size = batch * filters * spatial; const int num_blocks = get_number_of_blocks(current_size, BLOCK); - scale_bias_kernel << > >(output, scale, batch, filters, spatial, current_size); + scale_bias_kernel <<>>(output, scale, batch, filters, spatial, current_size); CHECK_CUDA(cudaPeekAtLastError()); } @@ -117,7 +117,7 @@ void add_bias_gpu(float *output, float *biases, int batch, int filters, int spat const int current_size = batch * filters * spatial; const int num_blocks = get_number_of_blocks(current_size, BLOCK); - add_bias_kernel << > >(output, biases, batch, filters, spatial, current_size); + add_bias_kernel <<>>(output, biases, batch, filters, spatial, current_size); CHECK_CUDA(cudaPeekAtLastError()); } @@ -202,7 +202,7 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) { - adam_kernel << > >(n, x, m, v, B1, B2, rate, eps, t); + adam_kernel <<>>(n, x, m, v, B1, B2, rate, eps, t); CHECK_CUDA(cudaPeekAtLastError()); } @@ -235,7 +235,7 @@ extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, const int current_size = batch * filters * spatial; const int num_blocks = get_number_of_blocks(current_size, BLOCK); - normalize_kernel << > >(current_size, x, mean, variance, batch, filters, spatial); + normalize_kernel <<>>(current_size, x, mean, variance, batch, filters, spatial); CHECK_CUDA(cudaPeekAtLastError()); } @@ -450,7 +450,7 @@ __global__ void constrain_weight_updates_kernel(int N, float coef, float *weight extern "C" void constrain_weight_updates_ongpu(int N, float coef, float *weights_gpu, float *weight_updates_gpu) { - constrain_weight_updates_kernel << > >(N, coef, weights_gpu, weight_updates_gpu); + constrain_weight_updates_kernel <<>>(N, coef, weights_gpu, weight_updates_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -512,8 +512,8 @@ __global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) __global__ void mask_kernel_new_api(int n, float *x, float mask_num, float *mask, float val) { - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n && mask[i] == mask_num) x[i] = val; + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i < n && mask[i] == mask_num) x[i] = val; } __global__ void mask_kernel(int n, float *x, float mask_num, float *mask) @@ -573,7 +573,7 @@ __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) { - fast_mean_kernel << > >(x, batch, filters, spatial, mean); + fast_mean_kernel <<>>(x, batch, filters, spatial, mean); CHECK_CUDA(cudaPeekAtLastError()); } @@ -669,7 +669,7 @@ __global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int f extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon) { - fast_v_cbn_kernel << > >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon); + fast_v_cbn_kernel <<>>(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon); CHECK_CUDA(cudaPeekAtLastError()); } @@ -683,7 +683,7 @@ __global__ void inverse_variance_kernel(int size, float *src, float *dst, float extern "C" void inverse_variance_ongpu(int size, float *src, float *dst, float epsilon) { const int num_blocks = size / BLOCK + 1; - inverse_variance_kernel << > >(size, src, dst, epsilon); + inverse_variance_kernel <<>>(size, src, dst, epsilon); CHECK_CUDA(cudaPeekAtLastError()); } @@ -708,7 +708,7 @@ extern "C" void normalize_scale_bias_gpu(float *x, float *mean, float *variance, const int current_size = batch * filters * spatial; const int num_blocks = get_number_of_blocks(current_size, BLOCK); - normalize_scale_bias_kernel << > >(current_size, x, mean, variance, scales, biases, batch, filters, spatial, inverse_variance, epsilon); + normalize_scale_bias_kernel <<>>(current_size, x, mean, variance, scales, biases, batch, filters, spatial, inverse_variance, epsilon); CHECK_CUDA(cudaPeekAtLastError()); } @@ -749,7 +749,7 @@ extern "C" void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY) extern "C" void simple_copy_ongpu(int size, float *src, float *dst) { const int num_blocks = size / BLOCK + 1; - simple_copy_kernel << > >(size, src, dst); + simple_copy_kernel <<>>(size, src, dst); CHECK_CUDA(cudaPeekAtLastError()); } @@ -804,7 +804,7 @@ extern "C" void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride extern "C" void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val) { - mask_kernel_new_api <<>>(N, X, mask_num, mask, val); + mask_kernel_new_api <<>>(N, X, mask_num, mask, val); CHECK_CUDA(cudaPeekAtLastError()); } @@ -828,7 +828,7 @@ extern "C" void constrain_ongpu(int N, float ALPHA, float * X, int INCX) extern "C" void constrain_min_max_ongpu(int N, float MIN, float MAX, float * X, int INCX) { - constrain_min_max_kernel << > >(N, MIN, MAX, X, INCX); + constrain_min_max_kernel <<>>(N, MIN, MAX, X, INCX); CHECK_CUDA(cudaPeekAtLastError()); } @@ -841,7 +841,7 @@ extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX) extern "C" void scal_add_ongpu(int N, float ALPHA, float BETA, float * X, int INCX) { - scal_add_kernel << > >(N, ALPHA, BETA, X, INCX); + scal_add_kernel <<>>(N, ALPHA, BETA, X, INCX); CHECK_CUDA(cudaPeekAtLastError()); } @@ -855,7 +855,7 @@ extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX) { //fill_kernel<<>>(N, ALPHA, X, INCX); //CHECK_CUDA(cudaPeekAtLastError()); - fill_kernel << > >(N, ALPHA, X, INCX); + fill_kernel <<>>(N, ALPHA, X, INCX); CHECK_CUDA(cudaPeekAtLastError()); } @@ -884,7 +884,7 @@ extern "C" void gradient_centralization_gpu(int w, int h, int c, int f, float *i const int f_size = c * h * w; if (f_size % WARP_SIZE == 0) { - gradient_centralization_kernel << > > (f, f_size, in); + gradient_centralization_kernel <<>> (f, f_size, in); CHECK_CUDA(cudaPeekAtLastError()); } } @@ -1003,10 +1003,10 @@ extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int * //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n); int size = batch * src_outputs; if (nweights == 0 && n == 1) { - shortcut_singlelayer_simple_kernel << > > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization); + shortcut_singlelayer_simple_kernel <<>> (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization); } else { - shortcut_multilayer_kernel << > > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization); + shortcut_multilayer_kernel <<>> (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization); } CHECK_CUDA(cudaPeekAtLastError()); } @@ -1083,7 +1083,6 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i int add_outputs = outputs_of_layers_gpu[i]; if (src_i < add_outputs) { int add_index = add_outputs*src_b + src_i; - int out_index = id; float *layer_delta = layers_delta_gpu[i]; if (weights_gpu) { @@ -1126,14 +1125,14 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization) { - const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) - int step = 0; - if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 + //const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) + //int step = 0; + //if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 //printf(" nweights = %d, n = %d, layer_step = %d, step = %d \n", nweights, n, layer_step, step); //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n); int size = batch * src_outputs; - backward_shortcut_multilayer_kernel << > > (size, src_outputs, batch, n, outputs_of_layers_gpu, + backward_shortcut_multilayer_kernel <<>> (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_delta_gpu, delta_out, delta_in, weights_gpu, weight_updates_gpu, nweights, in, layers_output_gpu, weights_normalization); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1202,7 +1201,7 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, { if (w1 == w2 && h1 == h2 && c1 == c2) { int size = batch * w1 * h1 * c1; - simple_input_shortcut_kernel << > >(in, size, add, out); + simple_input_shortcut_kernel <<>>(in, size, add, out); CHECK_CUDA(cudaPeekAtLastError()); return; } @@ -1219,9 +1218,9 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, if (sample < 1) sample = 1; int size = batch * minw * minh * minc; - //input_shortcut_kernel << > >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); + //input_shortcut_kernel <<>>(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); simple_copy_ongpu(w2 * h2 * c2 * batch, in, out); - shortcut_kernel << > >(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); + shortcut_kernel <<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1250,18 +1249,18 @@ extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, fl __global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) { - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - float t = truth[i]; - float p = pred[i]; - error[i] = (t) ? -log(p) : 0; - delta[i] = t - p; - } + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i < n) { + float t = truth[i]; + float p = pred[i]; + error[i] = (t) ? -log(p) : 0; + delta[i] = t - p; + } } extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) { - softmax_x_ent_kernel << > >(n, pred, truth, delta, error); + softmax_x_ent_kernel <<>>(n, pred, truth, delta, error); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1364,35 +1363,35 @@ extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float t __device__ void softmax_device_new_api(float *input, int n, float temp, int stride, float *output) { - int i; - float sum = 0; - float largest = -INFINITY; - for (i = 0; i < n; ++i) { - int val = input[i*stride]; - largest = (val>largest) ? val : largest; - } - for (i = 0; i < n; ++i) { - float e = expf(input[i*stride] / temp - largest / temp); - sum += e; - output[i*stride] = e; - } - for (i = 0; i < n; ++i) { - output[i*stride] /= sum; - } + int i; + float sum = 0; + float largest = -INFINITY; + for (i = 0; i < n; ++i) { + int val = input[i*stride]; + largest = (val>largest) ? val : largest; + } + for (i = 0; i < n; ++i) { + float e = expf(input[i*stride] / temp - largest / temp); + sum += e; + output[i*stride] = e; + } + for (i = 0; i < n; ++i) { + output[i*stride] /= sum; + } } __global__ void softmax_kernel_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) { - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= batch*groups) return; - int b = id / groups; - int g = id % groups; - softmax_device_new_api(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= batch*groups) return; + int b = id / groups; + int g = id % groups; + softmax_device_new_api(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); } extern "C" void softmax_gpu_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) { - softmax_kernel_new_api << > >(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); + softmax_kernel_new_api <<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1424,40 +1423,40 @@ __global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int bat extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) { size_t size = w*h*c*batch*stride*stride; - upsample_kernel << > >(size, in, w, h, c, batch, stride, forward, scale, out); + upsample_kernel <<>>(size, in, w, h, c, batch, stride, forward, scale, out); CHECK_CUDA(cudaPeekAtLastError()); } __global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) { - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= spatial*batch*groups) return; - int s = id % spatial; - id = id / spatial; - int g = id % groups; - int b = id / groups; - int goff = group_offset[g] * spatial; - int boff = b*stride; - softmax_device_new_api(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= spatial*batch*groups) return; + int s = id % spatial; + id = id / spatial; + int g = id % groups; + int b = id / groups; + int goff = group_offset[g] * spatial; + int boff = b*stride; + softmax_device_new_api(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); } extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) { - int *tree_groups_size = cuda_make_int_array_new_api(hier.group_size, hier.groups); - int *tree_groups_offset = cuda_make_int_array_new_api(hier.group_offset, hier.groups); - /* - static int *tree_groups_size = 0; - static int *tree_groups_offset = 0; - if(!tree_groups_size){ - tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); - tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); - } - */ - int num = spatial*batch*hier.groups; - softmax_tree_kernel <<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); + int *tree_groups_size = cuda_make_int_array_new_api(hier.group_size, hier.groups); + int *tree_groups_offset = cuda_make_int_array_new_api(hier.group_offset, hier.groups); + /* + static int *tree_groups_size = 0; + static int *tree_groups_offset = 0; + if(!tree_groups_size){ + tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); + tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); + } + */ + int num = spatial*batch*hier.groups; + softmax_tree_kernel <<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); CHECK_CUDA(cudaPeekAtLastError()); - cuda_free((float *)tree_groups_size); - cuda_free((float *)tree_groups_offset); + cuda_free((float *)tree_groups_size); + cuda_free((float *)tree_groups_offset); } @@ -1476,7 +1475,7 @@ extern "C" void fix_nan_and_inf(float *input, size_t size) { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - fix_nan_and_inf_kernel << > >(input, size); + fix_nan_and_inf_kernel <<>>(input, size); CHECK_CUDA(cudaPeekAtLastError()); //CHECK_CUDA(cudaDeviceSynchronize()); } @@ -1497,7 +1496,7 @@ extern "C" void reset_nan_and_inf(float *input, size_t size) { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - reset_nan_and_inf_kernel << > >(input, size); + reset_nan_and_inf_kernel <<>>(input, size); CHECK_CUDA(cudaPeekAtLastError()); //CHECK_CUDA(cudaDeviceSynchronize()); } @@ -1522,7 +1521,7 @@ extern "C" int is_nan_or_inf(float *input, size_t size) const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - is_nan_or_inf_kernel << > >(input, size, pinned_return); + is_nan_or_inf_kernel <<>>(input, size, pinned_return); CHECK_CUDA(cudaDeviceSynchronize()); int ret_val = *pinned_return; @@ -1550,10 +1549,9 @@ extern "C" void add_3_arrays_activate(float *a1, float *a2, float *a3, size_t si const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); if (!(a == LOGISTIC || a == TANH || a == LEAKY || a == LINEAR)) { - printf(" add_3_arrays_activate() doesn't support activation %d, it supports only LOGISTIC and TANH \n", a); - exit(EXIT_FAILURE); + error("Error: add_3_arrays_activate() supports only LOGISTIC and TANH", DARKNET_LOC); } - add_3_arrays_activate_kernel << > >(a1, a2, a3, size, a, dst); + add_3_arrays_activate_kernel <<>>(a1, a2, a3, size, a, dst); } @@ -1569,7 +1567,7 @@ extern "C" void sum_of_mults(float *a1, float *a2, float *b1, float *b2, size_t { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - sum_of_mults_kernel << > >(a1, a2, b1, b2, size, dst); + sum_of_mults_kernel <<>>(a1, a2, b1, b2, size, dst); } @@ -1589,10 +1587,9 @@ extern "C" void activate_and_mult(float *a1, float *a2, size_t size, ACTIVATION const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); if (!(a == TANH || a == LEAKY || a == LINEAR)) { - printf(" activat_and_mult() doesn't support activation %d, it supports only TANH \n", a); - exit(EXIT_FAILURE); + error("Error: activat_and_mult() supports only TANH", DARKNET_LOC); } - activate_and_mult_kernel << > >(a1, a2, size, a, dst); + activate_and_mult_kernel <<>>(a1, a2, size, a, dst); } @@ -1616,7 +1613,7 @@ extern "C" void scale_channels_gpu(float *in_w_h_c, int size, int channel_size, { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - scale_channels_kernel << > >(in_w_h_c, size, channel_size, batch_size, scale_wh, scales_c, out); + scale_channels_kernel <<>>(in_w_h_c, size, channel_size, batch_size, scale_wh, scales_c, out); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1673,7 +1670,7 @@ extern "C" void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - backward_scale_channels_kernel << > > (in_w_h_c_delta, size, channel_size, batch_size, scale_wh, + backward_scale_channels_kernel <<>> (in_w_h_c_delta, size, channel_size, batch_size, scale_wh, in_scales_c, out_from_delta, in_from_output, out_state_delta); @@ -1693,7 +1690,7 @@ extern "C" void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scal { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - sam_kernel << > >(in_w_h_c, size, channel_size, scales_c, out); + sam_kernel <<>>(in_w_h_c, size, channel_size, scales_c, out); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1718,7 +1715,7 @@ extern "C" void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_si { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - backward_sam_kernel << > > (in_w_h_c_delta, size, channel_size, + backward_sam_kernel <<>> (in_w_h_c_delta, size, channel_size, in_scales_c, out_from_delta, in_from_output, out_state_delta); @@ -1732,8 +1729,8 @@ __global__ void smooth_rotate_weights_kernel(const float *src_weight_gpu, float const int kernel_area = kernel_size * kernel_size; const int i = index * kernel_area; - const int stage_step = (nweights / kernel_area) / 4; // 4 stages - const int stage_id = index / stage_step; + //const int stage_step = (nweights / kernel_area) / 4; // 4 stages + //const int stage_id = index / stage_step; // nweights = (c / groups) * n * size * size; // kernel_area = size*size @@ -1804,7 +1801,7 @@ extern "C" void smooth_rotate_weights_gpu(const float *src_weight_gpu, float *we const int kernel_area = size*size; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - smooth_rotate_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); + smooth_rotate_weights_kernel <<>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); CHECK_CUDA(cudaPeekAtLastError()); } @@ -1902,7 +1899,7 @@ extern "C" void stretch_weights_gpu(const float *src_weight_gpu, float *weight_d const int kernel_area = size*size; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - stretch_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, scale, reverse); + stretch_weights_kernel <<>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, scale, reverse); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2008,7 +2005,7 @@ extern "C" void sway_and_flip_weights_gpu(const float *src_weight_gpu, float *we const int kernel_area = size*size; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - sway_and_flip_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); + sway_and_flip_weights_kernel <<>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2091,7 +2088,7 @@ extern "C" void rotate_weights_gpu(const float *src_weight_gpu, float *weight_de const int kernel_area = size*size; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - rotate_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, reverse); + rotate_weights_kernel <<>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, reverse); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2258,7 +2255,7 @@ extern "C" void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float const int kernel_area = size*size; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - stretch_sway_flip_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); + stretch_sway_flip_weights_kernel <<>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2285,7 +2282,7 @@ extern "C" void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu const int current_size = size / groups; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(current_size, block_size); - reduce_and_expand_array_kernel << > > (src_gpu, dst_gpu, current_size, groups); + reduce_and_expand_array_kernel <<>> (src_gpu, dst_gpu, current_size, groups); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2308,7 +2305,7 @@ extern "C" void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, const int current_size = size / groups; const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(current_size, block_size); - expand_array_kernel << > > (src_gpu, dst_gpu, current_size, groups); + expand_array_kernel <<>> (src_gpu, dst_gpu, current_size, groups); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2340,7 +2337,7 @@ extern "C" void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - mult_inverse_array_kernel << > > (src_gpu, dst_gpu, size, eps, divider, clip, abs_add); + mult_inverse_array_kernel <<>> (src_gpu, dst_gpu, size, eps, divider, clip, abs_add); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2385,7 +2382,7 @@ extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(contrast_p_size, block_size); - P_constrastive_f_det_kernel << > > (labels, feature_size, temperature, contrast_p, contrast_p_size); + P_constrastive_f_det_kernel <<>> (labels, feature_size, temperature, contrast_p, contrast_p_size); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2432,7 +2429,7 @@ extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); - coord_conv_kernel << > > (dst, w, h, chan, b, type); + coord_conv_kernel <<>> (dst, w, h, chan, b, type); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2449,7 +2446,7 @@ __global__ void forward_implicit_kernel(int size, int batch, int nweights, float extern "C" void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu) { int size = batch * nweights; - forward_implicit_kernel << > > (size, batch, nweights, weight_gpu, output_gpu); + forward_implicit_kernel <<>> (size, batch, nweights, weight_gpu, output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2468,6 +2465,6 @@ __global__ void backward_implicit_kernel(int size, int batch, int nweights, floa extern "C" void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu) { int size = nweights; - backward_implicit_kernel << > > (size, batch, nweights, weight_updates_gpu, delta_gpu); + backward_implicit_kernel <<>> (size, batch, nweights, weight_updates_gpu, delta_gpu); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/Detector/darknet/src/box.c b/src/Detector/darknet/src/box.c index 201ab1334..0ad1263e5 100644 --- a/src/Detector/darknet/src/box.c +++ b/src/Detector/darknet/src/box.c @@ -345,12 +345,12 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { p_dl += ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C); p_dr += ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C); } - if (Iw<=0||Ih<=0) { - p_dt = ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C); + if (Iw<=0||Ih<=0) { + p_dt = ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C); p_db = ((giou_C * dU_wrt_b) - (U * dC_wrt_b)) / (giou_C * giou_C); p_dl = ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C); p_dr = ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C); - } + } } float Ct = fmin(pred.y - pred.h / 2,truth.y - truth.h / 2); @@ -418,21 +418,21 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C); p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C); } - if (Iw<=0||Ih<=0){ + if (Iw<=0||Ih<=0){ p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C); p_dh = (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C); } } - //The following codes are calculating the gradient of ciou. + //The following codes are calculating the gradient of ciou. if (iou_loss == CIOU) { - float ar_gt = truth.w / truth.h; + float ar_gt = truth.w / truth.h; float ar_pred = pred.w / pred.h; float ar_loss = 4 / (M_PI * M_PI) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred)); - float alpha = ar_loss / (1 - I/U + ar_loss + 0.000001); - float ar_dw=8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.h; + float alpha = ar_loss / (1 - I/U + ar_loss + 0.000001); + float ar_dw=8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.h; float ar_dh=-8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.w; if (C > 0) { // dar* @@ -441,7 +441,7 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw; p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C) + alpha * ar_dh; } - if (Iw<=0||Ih<=0){ + if (Iw<=0||Ih<=0){ p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw; diff --git a/src/Detector/darknet/src/box.h b/src/Detector/darknet/src/box.h index 4b720653c..f72e26c46 100644 --- a/src/Detector/darknet/src/box.h +++ b/src/Detector/darknet/src/box.h @@ -12,19 +12,19 @@ typedef struct{ } dbox; //typedef struct detection { -// box bbox; -// int classes; -// float *prob; -// float *mask; -// float objectness; -// int sort_class; +// box bbox; +// int classes; +// float *prob; +// float *mask; +// float objectness; +// int sort_class; //} detection; typedef struct detection_with_class { - detection det; - // The most probable class id: the best class index in this->prob. - // Is filled temporary when processing results, otherwise not initialized - int best_class; + detection det; + // The most probable class id: the best class index in this->prob. + // Is filled temporary when processing results, otherwise not initialized + int best_class; } detection_with_class; #ifdef __cplusplus diff --git a/src/Detector/darknet/src/captcha.c b/src/Detector/darknet/src/captcha.c index 0cc159152..5fd565d04 100644 --- a/src/Detector/darknet/src/captcha.c +++ b/src/Detector/darknet/src/captcha.c @@ -85,7 +85,7 @@ void train_captcha(char *cfgfile, char *weightfile) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + printf("%d: %f, %f avg, %lf seconds, %" PRIu64 " images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if(i%100==0){ char buff[256]; diff --git a/src/Detector/darknet/src/cifar.c b/src/Detector/darknet/src/cifar.c index 1b87cb5f0..1ef221e87 100644 --- a/src/Detector/darknet/src/cifar.c +++ b/src/Detector/darknet/src/cifar.c @@ -29,7 +29,7 @@ void train_cifar(char *cfgfile, char *weightfile) float loss = train_network_sgd(net, train, 1); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); if(*net.seen/N > epoch){ epoch = *net.seen/N; char buff[256]; @@ -85,7 +85,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile) float loss = train_network_sgd(net, train, 1); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); if(*net.seen/N > epoch){ epoch = *net.seen/N; char buff[256]; diff --git a/src/Detector/darknet/src/classifier.c b/src/Detector/darknet/src/classifier.c index e42b1ea3a..f01303327 100644 --- a/src/Detector/darknet/src/classifier.c +++ b/src/Detector/darknet/src/classifier.c @@ -74,7 +74,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) { printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n", l.outputs, classes); - getchar(); + error("Error!", DARKNET_LOC); } char **labels = get_labels(label_list); @@ -208,7 +208,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, if (avg_time < 0) avg_time = time_remaining; else avg_time = alpha_time * time_remaining + (1 - alpha_time) * avg_time; start = what_time_is_it_now(); - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images, %f hours left\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen, avg_time); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images, %f hours left\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen, avg_time); #ifdef OPENCV if (net.contrastive) { float cur_con_acc = -1; @@ -845,7 +845,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) { printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n", l.outputs, classes); - getchar(); + error("Error!", DARKNET_LOC); } if (top == 0) top = option_find_int(options, "top", 1); if (top > classes) top = classes; @@ -1129,11 +1129,8 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i sprintf(buff, "tmp/threat_%06d", count); //save_image(out, buff); -#ifndef _WIN32 - printf("\033[2J"); - printf("\033[1;1H"); -#endif - printf("\nFPS:%.0f\n",fps); + printf("\033[H\033[J"); + printf("\nFPS:%.0f\n", fps); for(i = 0; i < top; ++i){ int index = indexes[i]; @@ -1208,8 +1205,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde float *predictions = network_predict(net, in_s.data); top_predictions(net, top, indexes); - printf("\033[2J"); - printf("\033[1;1H"); + printf("\033[H\033[J"); int threat = 0; for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ @@ -1308,11 +1304,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1); top_predictions(net, top, indexes); -#ifndef _WIN32 - printf("\033[2J"); - printf("\033[1;1H"); -#endif - + printf("\033[H\033[J"); if (!benchmark) { printf("\rFPS: %.2f (use -benchmark command line flag for correct measurement)\n", fps); diff --git a/src/Detector/darknet/src/coco.c b/src/Detector/darknet/src/coco.c index 8c386a224..8ad13834b 100644 --- a/src/Detector/darknet/src/coco.c +++ b/src/Detector/darknet/src/coco.c @@ -291,7 +291,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile) if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh); char labelpath[4096]; - replace_image_to_label(path, labelpath); + replace_image_to_label(path, labelpath); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); @@ -320,6 +320,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile) //if (fps) free(fps); if (id) free(id); + free(truth); free_image(orig); free_image(sized); } @@ -373,10 +374,9 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) show_image(im, "predictions"); free_image(im); free_image(sized); - + free_alphabet(alphabet); wait_until_press_key_cv(); destroy_all_windows_cv(); - if (filename) break; } free(boxes); @@ -388,16 +388,17 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) void run_coco(int argc, char **argv) { - int dont_show = find_arg(argc, argv, "-dont_show"); - int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); + int dont_show = find_arg(argc, argv, "-dont_show"); + int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); int json_port = find_int_arg(argc, argv, "-json_port", -1); - char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); + char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .2); - float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); - int ext_output = find_arg(argc, argv, "-ext_output"); + int ext_output = find_arg(argc, argv, "-ext_output"); + char *json_file_output = find_char_arg(argc, argv, "-json_file_output", 0); if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); @@ -412,5 +413,5 @@ void run_coco(int argc, char **argv) else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, coco_classes, 80, 1, frame_skip, - prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0); + prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0, json_file_output); } diff --git a/src/Detector/darknet/src/col2im_kernels.cu b/src/Detector/darknet/src/col2im_kernels.cu index 0e07bc37a..ae651c49b 100644 --- a/src/Detector/darknet/src/col2im_kernels.cu +++ b/src/Detector/darknet/src/col2im_kernels.cu @@ -126,11 +126,11 @@ void col2im_gpu_ext(const float* data_col, const int channels, // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. // NOLINT_NEXT_LINE(whitespace/operators) - col2im_gpu_kernel_ext<< > >( + col2im_gpu_kernel_ext<<>>( num_kernels, data_col, height, width, channels, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, width_col, data_im); CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file +} diff --git a/src/Detector/darknet/src/compare.c b/src/Detector/darknet/src/compare.c index 62edabe96..4bacda4c2 100644 --- a/src/Detector/darknet/src/compare.c +++ b/src/Detector/darknet/src/compare.c @@ -1,5 +1,3 @@ -#include - #include "network.h" #include "detection_layer.h" #include "cost_layer.h" @@ -7,6 +5,8 @@ #include "parser.h" #include "box.h" +#include + void train_compare(char *cfgfile, char *weightfile) { srand(time(0)); @@ -54,7 +54,7 @@ void train_compare(char *cfgfile, char *weightfile) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + printf("%.3f: %f, %f avg, %lf seconds, %" PRIu64 " images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if(i%100 == 0){ char buff[256]; diff --git a/src/Detector/darknet/src/connected_layer.c b/src/Detector/darknet/src/connected_layer.c index 25a5ffa1e..244e82fbb 100644 --- a/src/Detector/darknet/src/connected_layer.c +++ b/src/Detector/darknet/src/connected_layer.c @@ -363,12 +363,12 @@ void forward_connected_layer_gpu(connected_layer l, network_state state) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n); #endif // CUDNN - if (l.batch_normalize) { - forward_batchnorm_layer_gpu(l, state); - } - else { - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); - } + if (l.batch_normalize) { + forward_batchnorm_layer_gpu(l, state); + } + else { + add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); + } //for(i = 0; i < l.batch; ++i) axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); } diff --git a/src/Detector/darknet/src/conv_lstm_layer.c b/src/Detector/darknet/src/conv_lstm_layer.c index 72e7eac38..a7804e736 100644 --- a/src/Detector/darknet/src/conv_lstm_layer.c +++ b/src/Detector/darknet/src/conv_lstm_layer.c @@ -1454,7 +1454,7 @@ void backward_conv_lstm_layer_gpu(layer l, network_state state) axpy_ongpu(l.outputs*l.batch, 1, l.bottelneck_delta_gpu + l.outputs*l.batch, 1, state.delta, 1); // lead to nan } else { - axpy_ongpu(l.outputs*l.batch, l.time_normalizer, l.temp3_gpu, 1, l.dh_gpu, 1); + // if (l.dh_gpu) axpy_ongpu(l.outputs*l.batch, l.time_normalizer, l.temp3_gpu, 1, l.dh_gpu, 1); } // c diff --git a/src/Detector/darknet/src/convolutional_kernels.cu b/src/Detector/darknet/src/convolutional_kernels.cu index 188399bb7..debd6bf15 100644 --- a/src/Detector/darknet/src/convolutional_kernels.cu +++ b/src/Detector/darknet/src/convolutional_kernels.cu @@ -65,7 +65,7 @@ __global__ void binarize_weights_kernel(float *weights, int n, int size, float * void binarize_weights_gpu(float *weights, int n, int size, float *binary) { - binarize_weights_kernel << > >(weights, n, size, binary); + binarize_weights_kernel <<>>(weights, n, size, binary); CHECK_CUDA(cudaPeekAtLastError()); } @@ -113,9 +113,9 @@ void fast_binarize_weights_gpu(float *weights, int n, int size, float *binary, f size_t gridsize = n * size; const int num_blocks = get_number_of_blocks(gridsize, BLOCK);// gridsize / BLOCK + 1; - set_zero_kernel << <(n/BLOCK + 1), BLOCK, 0, get_cuda_stream() >> > (mean_arr_gpu, n); - reduce_kernel << > > (weights, n, size, mean_arr_gpu); - binarize_weights_mean_kernel << > > (weights, n, size, binary, mean_arr_gpu); + set_zero_kernel <<<(n/BLOCK + 1), BLOCK, 0, get_cuda_stream() >>> (mean_arr_gpu, n); + reduce_kernel <<>> (weights, n, size, mean_arr_gpu); + binarize_weights_mean_kernel <<>> (weights, n, size, binary, mean_arr_gpu); CHECK_CUDA(cudaPeekAtLastError()); } else { @@ -351,7 +351,6 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) im2col_align_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, l.align_workspace_gpu, l.bit_align); //cudaDeviceSynchronize(); //stop_timer_and_show_name("im2col_align_ongpu"); - //getchar(); // should be optimized //start_timer(); @@ -383,7 +382,6 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) //} //cudaDeviceSynchronize(); //check_error(status); - //getchar(); } @@ -533,11 +531,11 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) /* int input_nan_inf = is_nan_or_inf(state.input, l.inputs * l.batch); printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf); - if (input_nan_inf) getchar(); + if (input_nan_inf) error(); int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.nweights); printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf); - if (weights_nan_inf) getchar(); + if (weights_nan_inf) error(); */ CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(), @@ -997,7 +995,7 @@ void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int { const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - calc_avg_activation_kernel << > > (src, dst, size, channels, batches); + calc_avg_activation_kernel <<>> (src, dst, size, channels, batches); } @@ -1021,7 +1019,7 @@ void assisted_activation_gpu(float alpha, float *output, float *gt_gpu, float *a { const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - assisted_activation_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); + assisted_activation_kernel <<>> (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); } @@ -1045,7 +1043,7 @@ void assisted_activation2_gpu(float alpha, float *output, float *gt_gpu, float * { const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - assisted_activation2_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); + assisted_activation2_kernel <<>> (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); } void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) diff --git a/src/Detector/darknet/src/convolutional_layer.c b/src/Detector/darknet/src/convolutional_layer.c index 88bb260ab..18f9e8b7f 100644 --- a/src/Detector/darknet/src/convolutional_layer.c +++ b/src/Detector/darknet/src/convolutional_layer.c @@ -330,9 +330,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_ } if (!found_conv_algorithm) { - printf(" Error: cuDNN isn't found FWD algo for convolution.\n"); - getchar(); - exit(0); + error("Error: cuDNN hasn't found FWD algo for convolution", DARKNET_LOC); } //printf(" cuDNN FWD algo: %d, time = %f ms \n", l->fw_algo, min_time); @@ -367,9 +365,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_ } if (!found_conv_algorithm) { - printf(" Error: cuDNN isn't found BWD-data algo for convolution.\n"); - getchar(); - exit(0); + error("Error: cuDNN hasn't found BWD-data algo for convolution", DARKNET_LOC); } //printf(" cuDNN BWD-data algo: %d \n", l->bd_algo); @@ -404,9 +400,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_ } if (!found_conv_algorithm) { - printf(" Error: cuDNN isn't found BWD-filter algo for convolution.\n"); - getchar(); - exit(0); + error("Error: cuDNN hasn't found BWD-filter algo for convolution", DARKNET_LOC); } //printf(" cuDNN BWD-filter algo: %d \n", l->bf_algo); @@ -550,8 +544,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (l.share_layer) { if (l.size != l.share_layer->size || l.nweights != l.share_layer->nweights || l.c != l.share_layer->c || l.n != l.share_layer->n) { - printf(" Layer size, nweights, channels or filters don't match for the share_layer"); - getchar(); + error("Layer size, nweights, channels or filters don't match for the share_layer", DARKNET_LOC); } l.weights = l.share_layer->weights; @@ -1688,4 +1681,3 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image free_image(dc); return single_weights; } - diff --git a/src/Detector/darknet/src/cost_layer.c b/src/Detector/darknet/src/cost_layer.c index ed1cc1344..60080b120 100644 --- a/src/Detector/darknet/src/cost_layer.c +++ b/src/Detector/darknet/src/cost_layer.c @@ -25,8 +25,8 @@ char *get_cost_string(COST_TYPE a) return "masked"; case SMOOTH: return "smooth"; - default: - return "sse"; + default: + return "sse"; } } diff --git a/src/Detector/darknet/src/dark_cuda.c b/src/Detector/darknet/src/dark_cuda.c index d155b4082..74f067724 100644 --- a/src/Detector/darknet/src/dark_cuda.c +++ b/src/Detector/darknet/src/dark_cuda.c @@ -54,7 +54,7 @@ void *cuda_get_context() return (void *)pctx; } -void check_error(cudaError_t status) +void check_error(cudaError_t status, const char * const filename, const char * const funcname, const int line) { cudaError_t status2 = cudaGetLastError(); if (status != cudaSuccess) @@ -63,10 +63,7 @@ void check_error(cudaError_t status) char buffer[256]; printf("\n CUDA Error: %s\n", s); snprintf(buffer, 256, "CUDA Error: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer, DARKNET_LOC); + error(buffer, filename, funcname, line); } if (status2 != cudaSuccess) { @@ -74,18 +71,15 @@ void check_error(cudaError_t status) char buffer[256]; printf("\n CUDA Error Prev: %s\n", s); snprintf(buffer, 256, "CUDA Error Prev: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer, DARKNET_LOC); + error(buffer, filename, funcname, line); } } -void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time) +void check_error_extended(cudaError_t status, const char * const filename, const char * const funcname, const int line) { if (status != cudaSuccess) { - printf("CUDA status Error: file: %s() : line: %d : build time: %s \n", file, line, date_time); - check_error(status); + printf("CUDA status Error: file: %s: func: %s() line: %d\n", filename, funcname, line); + check_error(status, filename, funcname, line); } #if defined(DEBUG) || defined(CUDA_DEBUG) cuda_debug_sync = 1; @@ -93,9 +87,9 @@ void check_error_extended(cudaError_t status, const char *file, int line, const if (cuda_debug_sync) { status = cudaDeviceSynchronize(); if (status != cudaSuccess) - printf("CUDA status = cudaDeviceSynchronize() Error: file: %s() : line: %d : build time: %s \n", file, line, date_time); + printf("CUDA status = cudaDeviceSynchronize() Error: file: %s: func: %s() line: %d\n", filename, funcname, line); } - check_error(status); + check_error(status, filename, funcname, line); } dim3 cuda_gridsize(size_t n){ @@ -180,7 +174,7 @@ cudnnHandle_t cudnn_handle() } -void cudnn_check_error(cudnnStatus_t status) +void cudnn_check_error(cudnnStatus_t status, const char * const filename, const char * const function, const int line) { #if defined(DEBUG) || defined(CUDA_DEBUG) cudaDeviceSynchronize(); @@ -198,10 +192,7 @@ void cudnn_check_error(cudnnStatus_t status) char buffer[256]; printf("\n cuDNN Error: %s\n", s); snprintf(buffer, 256, "cuDNN Error: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer, DARKNET_LOC); + error(buffer, filename, function, line); } if (status2 != CUDNN_STATUS_SUCCESS) { @@ -209,18 +200,15 @@ void cudnn_check_error(cudnnStatus_t status) char buffer[256]; printf("\n cuDNN Error Prev: %s\n", s); snprintf(buffer, 256, "cuDNN Error Prev: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer, DARKNET_LOC); + error(buffer, filename, function, line); } } -void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time) +void cudnn_check_error_extended(cudnnStatus_t status, const char * const filename, const char * const function, const int line) { if (status != CUDNN_STATUS_SUCCESS) { - printf("\n cuDNN status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); - cudnn_check_error(status); + printf("\n cuDNN status Error in: file: %s function: %s() line: %d\n", filename, function, line); + cudnn_check_error(status, filename, function, line); } #if defined(DEBUG) || defined(CUDA_DEBUG) cuda_debug_sync = 1; @@ -228,9 +216,9 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line if (cuda_debug_sync) { cudaError_t status = cudaDeviceSynchronize(); if (status != CUDNN_STATUS_SUCCESS) - printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); + printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s function: %s() line: %d\n", filename, function, line); } - cudnn_check_error(status); + cudnn_check_error(status, filename, function, line); } static cudnnHandle_t switchCudnnHandle[16]; @@ -251,10 +239,10 @@ void cublas_check_error(cublasStatus_t status) } } -void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time) +void cublas_check_error_extended(cublasStatus_t status, const char * const filename, const char * const function, const int line) { if (status != CUBLAS_STATUS_SUCCESS) { - printf("\n cuBLAS status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); + printf("\n cuBLAS status Error in: file: %s function: %s() line: %d\n", filename, function, line); } #if defined(DEBUG) || defined(CUDA_DEBUG) cuda_debug_sync = 1; @@ -262,7 +250,7 @@ void cublas_check_error_extended(cublasStatus_t status, const char *file, int li if (cuda_debug_sync) { cudaError_t status = cudaDeviceSynchronize(); if (status != CUDA_SUCCESS) - printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); + printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s function: %s() line: %d\n", filename, function, line); } cublas_check_error(status); } @@ -385,14 +373,14 @@ void free_pinned_memory() void pre_allocate_pinned_memory(const size_t size) { const size_t num_of_blocks = size / pinned_block_size + ((size % pinned_block_size) ? 1 : 0); - printf("pre_allocate... pinned_ptr = %p \n", pinned_ptr); + printf("pre_allocate... pinned_ptr = %p \n", (void *)pinned_ptr); pthread_mutex_lock(&mutex_pinned); if (!pinned_ptr) { pinned_ptr = (float **)calloc(num_of_blocks, sizeof(float *)); if(!pinned_ptr) error("calloc failed in pre_allocate()", DARKNET_LOC); - printf("pre_allocate: size = %Iu MB, num_of_blocks = %Iu, block_size = %Iu MB \n", + printf("pre_allocate: size = %zu MB, num_of_blocks = %zu, block_size = %zu MB \n", size / (1024*1024), num_of_blocks, pinned_block_size / (1024 * 1024)); int k; @@ -402,7 +390,7 @@ void pre_allocate_pinned_memory(const size_t size) CHECK_CUDA(status); if (!pinned_ptr[k]) error("cudaHostAlloc failed", DARKNET_LOC); else { - printf(" Allocated %d pinned block \n", pinned_block_size); + printf(" Allocated %zu pinned block \n", pinned_block_size); } } pinned_num_of_blocks = num_of_blocks; @@ -423,7 +411,7 @@ float *cuda_make_array_pinned_preallocated(float *x, size_t n) { if ((allocation_size + pinned_index) > pinned_block_size) { const float filled = (float)100 * pinned_index / pinned_block_size; - printf("\n Pinned block_id = %d, filled = %f %% \n", pinned_block_id, filled); + printf("\n Pinned block_id = %zu, filled = %f %% \n", pinned_block_id, filled); pinned_block_id++; pinned_index = 0; } @@ -438,13 +426,13 @@ float *cuda_make_array_pinned_preallocated(float *x, size_t n) if(!x_cpu) { if (allocation_size > pinned_block_size / 2) { - printf("Try to allocate new pinned memory, size = %d MB \n", size / (1024 * 1024)); + printf("Try to allocate new pinned memory, size = %zu MB \n", size / (1024 * 1024)); cudaError_t status = cudaHostAlloc((void **)&x_cpu, size, cudaHostRegisterMapped); if (status != cudaSuccess) fprintf(stderr, " Can't allocate CUDA-pinned memory on CPU-RAM (pre-allocated memory is over too) \n"); CHECK_CUDA(status); } else { - printf("Try to allocate new pinned BLOCK, size = %d MB \n", size / (1024 * 1024)); + printf("Try to allocate new pinned BLOCK, size = %zu MB \n", size / (1024 * 1024)); pinned_num_of_blocks++; pinned_block_id = pinned_num_of_blocks - 1; pinned_index = 0; @@ -553,17 +541,17 @@ int *cuda_make_int_array(size_t n) int *cuda_make_int_array_new_api(int *x, size_t n) { - int *x_gpu; - size_t size = sizeof(int)*n; - cudaError_t status = cudaMalloc((void **)&x_gpu, size); + int *x_gpu; + size_t size = sizeof(int)*n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); CHECK_CUDA(status); - if (x) { - //status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + if (x) { + //status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); CHECK_CUDA(status); - } - if (!x_gpu) error("Cuda malloc failed", DARKNET_LOC); - return x_gpu; + } + if (!x_gpu) error("Cuda malloc failed", DARKNET_LOC); + return x_gpu; } void cuda_free(float *x_gpu) @@ -602,7 +590,7 @@ void cuda_pull_array_async(float *x_gpu, float *x, size_t n) { size_t size = sizeof(float)*n; cudaError_t status = cudaMemcpyAsync(x, x_gpu, size, cudaMemcpyDefault, get_cuda_stream()); - check_error(status); + check_error(status, DARKNET_LOC); //cudaStreamSynchronize(get_cuda_stream()); } diff --git a/src/Detector/darknet/src/dark_cuda.h b/src/Detector/darknet/src/dark_cuda.h index 9251e8776..ffe3836b6 100644 --- a/src/Detector/darknet/src/dark_cuda.h +++ b/src/Detector/darknet/src/dark_cuda.h @@ -25,6 +25,18 @@ extern int gpu_index; #include #include #include + +#ifdef CUDA_OPENGL_INTEGRATION +// On Windows, we need to include before +// including OpenGL headers or else we will get various +// compiler errors due to missing macros. +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif // _WIN32 + +#include +#endif // CUDA_OPENGL_INTEGRATION //#include #ifdef CUDNN @@ -54,11 +66,11 @@ extern int gpu_index; #ifdef __cplusplus extern "C" { #endif // __cplusplus - void check_error(cudaError_t status); - void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); - void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time); -#define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); -#define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); + void check_error(cudaError_t status, const char * const filename, const char * const funcname, const int line); + void check_error_extended(cudaError_t status, const char * const filename, const char * const funcname, const int line); + void cublas_check_error_extended(cublasStatus_t status, const char * const filename, const char * const funcname, const int line); +#define CHECK_CUDA(X) check_error_extended(X, __FILE__, __func__, __LINE__ ); +#define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__, __func__, __LINE__ ); cublasHandle_t blas_handle(); void free_pinned_memory(); @@ -68,7 +80,7 @@ extern "C" { float *cuda_make_array(float *x, size_t n); void **cuda_make_array_pointers(void **x, size_t n); int *cuda_make_int_array(size_t n); - int *cuda_make_int_array_new_api(int *x, size_t n); + int *cuda_make_int_array_new_api(int *x, size_t n); void cuda_push_array(float *x_gpu, float *x, size_t n); //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); //LIB_API void cuda_set_device(int n); @@ -92,8 +104,8 @@ extern "C" { cudnnHandle_t cudnn_handle(); enum {cudnn_fastest, cudnn_smallest, cudnn_specify}; -void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time); -#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); +void cudnn_check_error_extended(cudnnStatus_t status, const char * const filename, const char * const function, const int line); +#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__, __func__, __LINE__); #endif #ifdef __cplusplus diff --git a/src/Detector/darknet/src/darknet.c b/src/Detector/darknet/src/darknet.c index 13ab75f3d..392f2e466 100644 --- a/src/Detector/darknet/src/darknet.c +++ b/src/Detector/darknet/src/darknet.c @@ -117,7 +117,7 @@ void operations(char *cfgfile) ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w; } else if(l.type == CONNECTED){ ops += 2l * l.inputs * l.outputs; - } else if (l.type == RNN){ + } else if (l.type == RNN){ ops += 2l * l.input_layer->inputs * l.input_layer->outputs; ops += 2l * l.self_layer->inputs * l.self_layer->outputs; ops += 2l * l.output_layer->inputs * l.output_layer->outputs; @@ -251,7 +251,7 @@ void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) denormalize_connected_layer(*l.ui); denormalize_connected_layer(*l.ug); denormalize_connected_layer(*l.uo); - } + } } save_weights(net, outfile); } @@ -412,7 +412,7 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile) l.ug->batch_normalize = 0; l.uo->batch_normalize = 0; net.layers[i].batch_normalize=0; - } + } } save_weights(net, outfile); } @@ -432,7 +432,7 @@ void visualize(char *cfgfile, char *weightfile) int main(int argc, char **argv) { #ifdef _DEBUG - _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); printf(" _DEBUG is used \n"); #endif @@ -440,11 +440,11 @@ int main(int argc, char **argv) printf(" DEBUG=1 \n"); #endif - int i; - for (i = 0; i < argc; ++i) { - if (!argv[i]) continue; - strip_args(argv[i]); - } + int i; + for (i = 0; i < argc; ++i) { + if (!argv[i]) continue; + strip_args(argv[i]); + } //test_resize("data/bad.jpg"); //test_box(); @@ -454,11 +454,6 @@ int main(int argc, char **argv) return 0; } gpu_index = find_int_arg(argc, argv, "-i", 0); - if(find_arg(argc, argv, "-nogpu")) { - gpu_index = -1; - printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n"); - exit(-1); - } #ifndef GPU gpu_index = -1; @@ -493,7 +488,7 @@ int main(int argc, char **argv) run_detector(argc, argv); } else if (0 == strcmp(argv[1], "detect")){ float thresh = find_float_arg(argc, argv, "-thresh", .24); - int ext_output = find_arg(argc, argv, "-ext_output"); + int ext_output = find_arg(argc, argv, "-ext_output"); char *filename = (argc > 4) ? argv[4]: 0; test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0, 0); } else if (0 == strcmp(argv[1], "cifar")){ diff --git a/src/Detector/darknet/src/data.c b/src/Detector/darknet/src/data.c index 7d2a46599..70e1b09b2 100644 --- a/src/Detector/darknet/src/data.c +++ b/src/Detector/darknet/src/data.c @@ -9,8 +9,6 @@ #include #include -extern int check_mistakes; - #define NUMCHARS 37 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; @@ -206,10 +204,6 @@ box_label *read_boxes(char *filename, int *n) char *new_line = "\n"; fwrite(new_line, sizeof(char), strlen(new_line), fw); fclose(fw); - if (check_mistakes) { - printf("\n Error in read_boxes() \n"); - getchar(); - } *n = 0; return boxes; @@ -409,7 +403,6 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d], file: %s \n", id, (classes-1), labelpath); sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1)); system(buff); - if (check_mistakes) getchar(); ++sub; continue; } @@ -424,7 +417,6 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); system(buff); ++sub; - if (check_mistakes) getchar(); continue; } if (x <= 0 || x > 1 || y <= 0 || y > 1) { @@ -432,7 +424,6 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); system(buff); ++sub; - if (check_mistakes) getchar(); continue; } if (w > 1) { @@ -440,14 +431,12 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); system(buff); w = 1; - if (check_mistakes) getchar(); } if (h > 1) { printf("\n Wrong annotation: h = %f, file: %s \n", h, labelpath); sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); system(buff); h = 1; - if (check_mistakes) getchar(); } if (x == 0) x += lowest_w; if (y == 0) y += lowest_h; @@ -1057,14 +1046,11 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo if (use_mixup == 2 || use_mixup == 4) { printf("\n cutmix=1 - isn't supported for Detector (use cutmix=1 only for Classifier) \n"); - if (check_mistakes) getchar(); if(use_mixup == 2) use_mixup = 0; else use_mixup = 3; } if (use_mixup == 3 && letter_box) { - //printf("\n Combination: letter_box=1 & mosaic=1 - isn't supported, use only 1 of these parameters \n"); - //if (check_mistakes) getchar(); - //exit(0); + error("Combination: letter_box=1 & mosaic=1 - isn't supported, use only 1 of these parameters", DARKNET_LOC); } if (random_gen() % 2 == 0) use_mixup = 0; int i; @@ -1111,9 +1097,6 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo if (src == NULL) { printf("\n Error in load_data_detection() - OpenCV \n"); fflush(stdout); - if (check_mistakes) { - getchar(); - } continue; } @@ -1238,6 +1221,15 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int swidth = ow - pleft - pright; int sheight = oh - ptop - pbot; + if (swidth <= 0 || sheight <= 0 || (ow - pleft) <= 0 || (oh - ptop) <= 0 ) { + printf("\n WARNING: invalid resize. Resetting swidth: %d , sheight: %d, pleft: %d, ptop: %d \n", dw, dh, 0 ,0); + printf("\n Original values: \n swidth = %d, sheight = %d, pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", swidth, sheight, pleft, pright, ptop, pbot, ow, oh); + swidth = ow; + sheight = oh; + pleft = 0; + ptop = 0; + } + float sx = (float)swidth / ow; float sy = (float)sheight / oh; @@ -1384,12 +1376,10 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo //assert(use_mixup < 2); if (use_mixup == 2) { - printf("\n cutmix=1 - isn't supported for Detector \n"); - exit(0); + error("cutmix=1 - isn't supported for Detector", DARKNET_LOC); } if (use_mixup == 3 || use_mixup == 4) { - printf("\n mosaic=1 - compile Darknet with OpenCV for using mosaic=1 \n"); - exit(0); + error("mosaic=1 - compile Darknet with OpenCV for using mosaic=1", DARKNET_LOC); } int mixup = use_mixup ? random_gen() % 2 : 0; //printf("\n mixup = %d \n", mixup); @@ -1558,8 +1548,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo show_image(sized, buff); wait_until_press_key_cv(); } - printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n"); - //getchar(); + printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images\n"); } free_image(orig); diff --git a/src/Detector/darknet/src/data.h b/src/Detector/darknet/src/data.h index ecbd0188e..9f12343a0 100644 --- a/src/Detector/darknet/src/data.h +++ b/src/Detector/darknet/src/data.h @@ -45,8 +45,8 @@ typedef struct load_args{ char **labels; int h; int w; - int c; // color depth - int out_w; + int c; // color depth + int out_w; int out_h; int nh; int nw; @@ -55,7 +55,7 @@ typedef struct load_args{ int classes; int background; int scale; - int small_object; + int small_object; float jitter; int flip; float angle; diff --git a/src/Detector/darknet/src/demo.c b/src/Detector/darknet/src/demo.c index 0c4033351..5a01faf36 100644 --- a/src/Detector/darknet/src/demo.c +++ b/src/Detector/darknet/src/demo.c @@ -73,7 +73,6 @@ void *fetch_in_thread(void *ptr) printf("Stream closed.\n"); custom_atomic_store_int(&flag_exit, 1); custom_atomic_store_int(&run_fetch_in_thread, 0); - //exit(EXIT_FAILURE); return 0; } //in_s = resize_image(in, net.w, net.h); @@ -142,7 +141,7 @@ double get_wall_time() void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, - int benchmark, int benchmark_layers) + int benchmark, int benchmark_layers, char *json_file_output) { if (avgframes < 1) avgframes = 1; avg_frames = avgframes; @@ -157,6 +156,15 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int demo_thresh = thresh; demo_ext_output = ext_output; demo_json_port = json_port; + char *json_buf = NULL; + FILE* json_file = NULL; + + if (json_file_output) { + json_file = fopen(json_file_output, "wb"); + char *tmp = "[\n"; + fwrite(tmp, sizeof(char), strlen(tmp), json_file); + } + printf("Demo\n"); net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 if(weightfile){ @@ -201,8 +209,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int if (l.classes != demo_classes) { printf("\n Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes); - getchar(); - exit(0); + error("Error!", DARKNET_LOC); } flag_exit = 0; @@ -280,8 +287,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int if (l.embedding_size) set_track_id(local_dets, local_nboxes, demo_thresh, l.sim_thresh, l.track_ciou_norm, l.track_history_size, l.dets_for_track, l.dets_for_show); - //printf("\033[2J"); - //printf("\033[1;1H"); + printf("\033[H\033[J"); //printf("\nFPS:%.1f\n", fps); printf("Objects:\n\n"); @@ -291,6 +297,16 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout); } + if (json_file_output) { + if (json_buf) { + char *tmp = ", \n"; + fwrite(tmp, sizeof(char), strlen(tmp), json_file); + } + json_buf = detection_to_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, NULL); + fwrite(json_buf, sizeof(char), strlen(json_buf), json_file); + free(json_buf); + } + //char *http_post_server = "webhook.site/898bbd9b-0ddd-49cf-b81d-1f56be98d870"; if (http_post_host && !send_http_post_once) { int timeout = 3; // 3 seconds @@ -344,12 +360,12 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int } while (custom_atomic_load_int(&run_detect_in_thread)) { - if(avg_fps > 180) this_thread_yield(); + if(avg_fps > 50) this_thread_yield(); else this_thread_sleep_for(thread_wait_ms); // custom_join(detect_thread, 0); } if (!benchmark) { while (custom_atomic_load_int(&run_fetch_in_thread)) { - if (avg_fps > 180) this_thread_yield(); + if (avg_fps > 50) this_thread_yield(); else this_thread_sleep_for(thread_wait_ms); // custom_join(fetch_thread, 0); } free_image(det_s); @@ -397,6 +413,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int printf("output_video_writer closed. \n"); } + if (json_file_output) { + char *tmp = "\n]"; + fwrite(tmp, sizeof(char), strlen(tmp), json_file); + fclose(json_file); + } this_thread_sleep_for(thread_wait_ms); custom_join(detect_thread, 0); @@ -414,21 +435,14 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int free_ptrs((void **)names, net.layers[net.n - 1].classes); - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); + free_alphabet(alphabet); free_network(net); //cudaProfilerStop(); } #else void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, - int benchmark, int benchmark_layers) + int benchmark, int benchmark_layers, char *json_file_output) { fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); } diff --git a/src/Detector/darknet/src/demo.h b/src/Detector/darknet/src/demo.h index 380b72fed..15f359dd0 100644 --- a/src/Detector/darknet/src/demo.h +++ b/src/Detector/darknet/src/demo.h @@ -6,7 +6,7 @@ extern "C" { #endif void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, - int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, int benchmark, int benchmark_layers); + int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, int benchmark, int benchmark_layers, char *json_file_output); #ifdef __cplusplus } #endif diff --git a/src/Detector/darknet/src/detection_layer.c b/src/Detector/darknet/src/detection_layer.c index 3c6528a9b..d3b9af8a4 100644 --- a/src/Detector/darknet/src/detection_layer.c +++ b/src/Detector/darknet/src/detection_layer.c @@ -287,29 +287,29 @@ void backward_detection_layer_gpu(detection_layer l, network_state state) void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) { - int i, j, n; - float *predictions = l.output; - //int per_cell = 5*num+classes; - for (i = 0; i < l.side*l.side; ++i) { - int row = i / l.side; - int col = i % l.side; - for (n = 0; n < l.n; ++n) { - int index = i*l.n + n; - int p_index = l.side*l.side*l.classes + i*l.n + n; - float scale = predictions[p_index]; - int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4; - box b; - b.x = (predictions[box_index + 0] + col) / l.side * w; - b.y = (predictions[box_index + 1] + row) / l.side * h; - b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w; - b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h; - dets[index].bbox = b; - dets[index].objectness = scale; - for (j = 0; j < l.classes; ++j) { - int class_index = i*l.classes; - float prob = scale*predictions[class_index + j]; - dets[index].prob[j] = (prob > thresh) ? prob : 0; - } - } - } + int i, j, n; + float *predictions = l.output; + //int per_cell = 5*num+classes; + for (i = 0; i < l.side*l.side; ++i) { + int row = i / l.side; + int col = i % l.side; + for (n = 0; n < l.n; ++n) { + int index = i*l.n + n; + int p_index = l.side*l.side*l.classes + i*l.n + n; + float scale = predictions[p_index]; + int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4; + box b; + b.x = (predictions[box_index + 0] + col) / l.side * w; + b.y = (predictions[box_index + 1] + row) / l.side * h; + b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w; + b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h; + dets[index].bbox = b; + dets[index].objectness = scale; + for (j = 0; j < l.classes; ++j) { + int class_index = i*l.classes; + float prob = scale*predictions[class_index + j]; + dets[index].prob[j] = (prob > thresh) ? prob : 0; + } + } + } } diff --git a/src/Detector/darknet/src/detector.c b/src/Detector/darknet/src/detector.c index 73b7bedc1..0fc361429 100644 --- a/src/Detector/darknet/src/detector.c +++ b/src/Detector/darknet/src/detector.c @@ -19,24 +19,22 @@ typedef __compar_fn_t comparison_fn_t; #include "http_stream.h" -int check_mistakes = 0; - static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 }; -void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path) +void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path, int mAP_epochs) { list *options = read_data_cfg(datacfg); char *train_images = option_find_str(options, "train", "data/train.txt"); char *valid_images = option_find_str(options, "valid", train_images); char *backup_directory = option_find_str(options, "backup", "/backup/"); + network net_map; if (calc_map) { FILE* valid_file = fopen(valid_images, "r"); if (!valid_file) { printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg); - getchar(); - exit(-1); + error("Error!", DARKNET_LOC); } else fclose(valid_file); @@ -55,7 +53,6 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i if (net_classes != names_size) { printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", name_list, names_size, net_classes, cfgfile); - if (net_classes > names_size) getchar(); } free_ptrs((void**)names, net_map.layers[net_map.n - 1].classes); } @@ -91,13 +88,17 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i const int actual_batch_size = net.batch * net.subdivisions; if (actual_batch_size == 1) { - printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n"); - getchar(); + error("Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64", DARKNET_LOC); } else if (actual_batch_size < 8) { printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size); } + int save_after_iterations = option_find_int(options, "saveweights", (net.max_batches < 10000) ? 1000 : 10000 ); // configure when to write weights. Very useful for smaller datasets! + int save_last_weights_after = option_find_int(options, "savelast", 100); + printf("Weights are saved after: %d iterations. Last weights (*_last.weight) are stored every %d iterations. \n", save_after_iterations, save_last_weights_after ); + + int imgs = net.batch * net.subdivisions * ngpus; printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); data train, buffer; @@ -298,7 +299,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i const int iteration = get_current_iteration(net); //i = get_current_batch(net); - int calc_map_for_each = 4 * train_images_num / (net.batch * net.subdivisions); // calculate mAP for each 4 Epochs + int calc_map_for_each = mAP_epochs * train_images_num / (net.batch * net.subdivisions); // calculate mAP every mAP_epochs calc_map_for_each = fmax(calc_map_for_each, 100); int next_map_calc = iter_map + calc_map_for_each; next_map_calc = fmax(next_map_calc, net.burn_in); @@ -308,6 +309,14 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i if (mean_average_precision > 0) printf("\n Last accuracy mAP@%0.2f = %2.2f %%, best = %2.2f %% ", iou_thresh, mean_average_precision * 100, best_map * 100); } + printf("\033[H\033[J"); + if (mean_average_precision > 0.0) { + printf("%d/%d: loss=%0.1f map=%0.2f best=%0.2f hours left=%0.1f\007", iteration, net.max_batches, loss, mean_average_precision, best_map, avg_time); + } + else { + printf("%d/%d: loss=%0.1f hours left=%0.1f\007", iteration, net.max_batches, loss, avg_time); + } + if (net.cudnn_half) { if (iteration < net.burn_in * 3) fprintf(stderr, "\n Tensor Cores are disabled until the first %d iterations are reached.\n", 3 * net.burn_in); else fprintf(stderr, "\n Tensor Cores are used.\n"); @@ -355,7 +364,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i iter_map = iteration; mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, thresh, iou_thresh, 0, net.letter_box, &net_map);// &net_combined); printf("\n mean_average_precision (mAP@%0.2f) = %f \n", iou_thresh, mean_average_precision); - if (mean_average_precision > best_map) { + if (mean_average_precision >= best_map) { best_map = mean_average_precision; printf("New best mAP!\n"); char buff[256]; @@ -380,10 +389,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, net.max_batches, mean_average_precision, draw_precision, "mAP%", avg_contrastive_acc / 100, dont_show, mjpeg_port, avg_time); #endif // OPENCV - //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { - //if (i % 100 == 0) { - if ((iteration >= (iter_save + 10000) || iteration % 10000 == 0) || - (iteration >= (iter_save + 1000) || iteration % 1000 == 0) && net.max_batches < 10000) + if ( (iteration >= (iter_save + save_after_iterations) || iteration % save_after_iterations == 0) ) { iter_save = iteration; #ifdef GPU @@ -394,7 +400,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i save_weights(net, buff); } - if (iteration >= (iter_save_last + 100) || (iteration % 100 == 0 && iteration > 1)) { + if ( (save_after_iterations > save_last_weights_after) && (iteration >= (iter_save_last + save_last_weights_after) || (iteration % save_last_weights_after == 0 && iteration > 1))) { iter_save_last = iteration; #ifdef GPU if (ngpus != 1) sync_nets(nets, ngpus, 0); @@ -483,7 +489,7 @@ static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_box if (dets[i].prob[j] > 0) { char buff[1024]; sprintf(buff, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); - fprintf(fp, buff); + fprintf(fp, "%s", buff); //printf("%s", buff); } } @@ -902,6 +908,7 @@ void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile) } //fprintf(stderr, " %s - %s - ", paths[i], labelpath); fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals / (i + 1), avg_iou * 100 / total, 100.*correct / total); + free(truth); free(id); free_image(orig); free_image(sized); @@ -962,7 +969,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa if (net.layers[net.n - 1].classes != names_size) { printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - getchar(); + error("Error!", DARKNET_LOC); } srand(time(0)); printf("\n calculation mAP (mean average precision)...\n"); @@ -970,13 +977,13 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa list *plist = get_paths(valid_images); char **paths = (char **)list_to_array(plist); + list *plist_dif = NULL; char **paths_dif = NULL; if (difficult_valid_images) { - list *plist_dif = get_paths(difficult_valid_images); + plist_dif = get_paths(difficult_valid_images); paths_dif = (char **)list_to_array(plist_dif); } - layer l = net.layers[net.n - 1]; int k; for (k = 0; k < net.n; ++k) { @@ -1181,6 +1188,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa //if(errors_in_this_image > 0) fwrite(buff, sizeof(char), strlen(buff), reinforcement_fd); free_detections(dets, nboxes); + free(truth); + free(truth_dif); free(id); free_image(val[t]); free_image(val_resized[t]); @@ -1274,7 +1283,6 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa free(truth_flags); - double mean_average_precision = 0; for (i = 0; i < classes; ++i) { @@ -1362,7 +1370,14 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa free(detections); free(truth_classes_count); free(detection_per_class_count); - + free(paths); + free(paths_dif); + free_list_contents(plist); + free_list(plist); + if (plist_dif) { + free_list_contents(plist_dif); + free_list(plist_dif); + } free(avg_iou_per_class); free(tp_for_thresh_per_class); free(fp_for_thresh_per_class); @@ -1468,7 +1483,6 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int sprintf(buff, "echo \"Wrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f\" >> bad_label.list", labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); system(buff); - if (check_mistakes) getchar(); } if (truth[j].id >= classes) { classes = truth[j].id + 1; @@ -1484,6 +1498,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes); } free(buff); + free(truth); } printf("\n all loaded. \n"); printf("\n calculating k-means++ ..."); @@ -1599,8 +1614,6 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int } free(rel_width_height_array); free(counter_per_class); - - getchar(); } @@ -1624,7 +1637,6 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam if (net.layers[net.n - 1].classes != names_size) { printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - if (net.layers[net.n - 1].classes > names_size) getchar(); } srand(2222222); char buff[256]; @@ -1757,17 +1769,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam free_ptrs((void**)names, net.layers[net.n - 1].classes); free_list_contents_kvp(options); free_list(options); - - int i; - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); - + free_alphabet(alphabet); free_network(net); } @@ -1795,7 +1797,6 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, if (net.layers[net.n - 1].classes != names_size) { printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - if (net.layers[net.n - 1].classes > names_size) getchar(); } srand(2222222); @@ -1939,8 +1940,7 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, int dont_show, int it_num, int letter_box, int benchmark_layers) { - printf(" ./darknet detector draw ... can't be used without OpenCV and CUDA! \n"); - getchar(); + error("darknet detector draw ... can't be used without OpenCV and CUDA", DARKNET_LOC); } #endif // defined(OPENCV) && defined(GPU) @@ -1955,7 +1955,6 @@ void run_detector(int argc, char **argv) int letter_box = find_arg(argc, argv, "-letter_box"); int calc_map = find_arg(argc, argv, "-map"); int map_points = find_int_arg(argc, argv, "-points", 0); - check_mistakes = find_arg(argc, argv, "-check_mistakes"); int show_imgs = find_arg(argc, argv, "-show_imgs"); int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); int avgframes = find_int_arg(argc, argv, "-avgframes", 3); @@ -1964,6 +1963,7 @@ void run_detector(int argc, char **argv) char *http_post_host = find_char_arg(argc, argv, "-http_post_host", 0); int time_limit_sec = find_int_arg(argc, argv, "-time_limit_sec", 0); char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); + char *json_file_output = find_char_arg(argc, argv, "-json_file_output", 0); char *outfile = find_char_arg(argc, argv, "-out", 0); char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24 @@ -1979,6 +1979,8 @@ void run_detector(int argc, char **argv) int ext_output = find_arg(argc, argv, "-ext_output"); int save_labels = find_arg(argc, argv, "-save_labels"); char* chart_path = find_char_arg(argc, argv, "-chart", 0); + // While training, decide after how many epochs mAP will be calculated. Default value is 4 which means the mAP will be calculated after each 4 epochs + int mAP_epochs = find_int_arg(argc, argv, "-mAP_epochs", 4); if (argc < 4) { fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; @@ -2017,7 +2019,7 @@ void run_detector(int argc, char **argv) if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0; char *filename = (argc > 6) ? argv[6] : 0; if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box, benchmark_layers); - else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, thresh, iou_thresh, mjpeg_port, show_imgs, benchmark_layers, chart_path); + else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, thresh, iou_thresh, mjpeg_port, show_imgs, benchmark_layers, chart_path, mAP_epochs); else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights); else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, map_points, letter_box, NULL); @@ -2035,7 +2037,7 @@ void run_detector(int argc, char **argv) if (strlen(filename) > 0) if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0; demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, - mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers); + mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers, json_file_output); free_list_contents_kvp(options); free_list(options); diff --git a/src/Detector/darknet/src/dice.c b/src/Detector/darknet/src/dice.c index 8a0393a8b..bb5d64379 100644 --- a/src/Detector/darknet/src/dice.c +++ b/src/Detector/darknet/src/dice.c @@ -33,7 +33,7 @@ void train_dice(char *cfgfile, char *weightfile) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + printf("%d: %f, %f avg, %lf seconds, %" PRIu64 " images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if((i % 100) == 0) net.learning_rate *= .1; if(i%100==0){ diff --git a/src/Detector/darknet/src/dropout_layer.c b/src/Detector/darknet/src/dropout_layer.c index 3cc73ed24..744c708a4 100644 --- a/src/Detector/darknet/src/dropout_layer.c +++ b/src/Detector/darknet/src/dropout_layer.c @@ -19,7 +19,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability, int d if (l.w <= 0 || l.h <= 0 || l.c <= 0) { printf(" Error: DropBlock - there must be positive values for: l.w=%d, l.h=%d, l.c=%d \n", l.w, l.h, l.c); - exit(0); + error("Error!", DARKNET_LOC); } } l.inputs = inputs; diff --git a/src/Detector/darknet/src/dropout_layer_kernels.cu b/src/Detector/darknet/src/dropout_layer_kernels.cu index 05cde5949..3ed7a186b 100644 --- a/src/Detector/darknet/src/dropout_layer_kernels.cu +++ b/src/Detector/darknet/src/dropout_layer_kernels.cu @@ -165,11 +165,11 @@ void forward_dropout_layer_gpu(dropout_layer l, network_state state) //fill_ongpu(l.outputs * l.batch, 1, state.input, 1); // remove!!! int num_blocks = l.batch * l.c; - dropblock_fast_kernel << > > (l.rand_gpu, block_prob, l.w, l.h, l.w*l.h, l.c, l.batch, block_size, l.drop_blocks_scale_gpu, state.input); + dropblock_fast_kernel <<>> (l.rand_gpu, block_prob, l.w, l.h, l.w*l.h, l.c, l.batch, block_size, l.drop_blocks_scale_gpu, state.input); CHECK_CUDA(cudaPeekAtLastError()); num_blocks = get_number_of_blocks(l.batch, BLOCK); - set_scales_dropblock_kernel << > > (l.drop_blocks_scale_gpu, block_size, block_size, l.outputs, l.batch); + set_scales_dropblock_kernel <<>> (l.drop_blocks_scale_gpu, block_size, block_size, l.outputs, l.batch); CHECK_CUDA(cudaPeekAtLastError()); /* @@ -205,7 +205,7 @@ void forward_dropout_layer_gpu(dropout_layer l, network_state state) */ num_blocks = get_number_of_blocks(l.outputs * l.batch, BLOCK); - scale_dropblock_kernel << > > (state.input, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu); + scale_dropblock_kernel <<>> (state.input, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -221,7 +221,7 @@ void forward_dropout_layer_gpu(dropout_layer l, network_state state) cuda_push_array(layer.rand_gpu, layer.rand, size); */ - yoloswag420blazeit360noscope << > > (state.input, size, l.rand_gpu, l.probability, l.scale); + yoloswag420blazeit360noscope <<>> (state.input, size, l.rand_gpu, l.probability, l.scale); CHECK_CUDA(cudaPeekAtLastError()); } } @@ -264,10 +264,10 @@ void backward_dropout_layer_gpu(dropout_layer l, network_state state) //fill_ongpu(l.outputs * l.batch, 1, state.delta, 1); // remove!!! int num_blocks = get_number_of_blocks(l.outputs * l.batch, BLOCK); - backward_dropblock_kernel << > >(l.rand_gpu, state.delta, l.outputs * l.batch); + backward_dropblock_kernel <<>>(l.rand_gpu, state.delta, l.outputs * l.batch); CHECK_CUDA(cudaPeekAtLastError()); - scale_dropblock_kernel << > > (state.delta, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu); + scale_dropblock_kernel <<>> (state.delta, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu); CHECK_CUDA(cudaPeekAtLastError()); /* @@ -305,7 +305,7 @@ void backward_dropout_layer_gpu(dropout_layer l, network_state state) } // dropout else { - yoloswag420blazeit360noscope << > > (state.delta, size, l.rand_gpu, l.probability, l.scale); + yoloswag420blazeit360noscope <<>> (state.delta, size, l.rand_gpu, l.probability, l.scale); CHECK_CUDA(cudaPeekAtLastError()); } } diff --git a/src/Detector/darknet/src/gaussian_yolo_layer.c b/src/Detector/darknet/src/gaussian_yolo_layer.c index bd99a89dc..f94f4a6ce 100644 --- a/src/Detector/darknet/src/gaussian_yolo_layer.c +++ b/src/Detector/darknet/src/gaussian_yolo_layer.c @@ -20,8 +20,6 @@ #define M_PI 3.141592 #endif -extern int check_mistakes; - layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes) { int i; @@ -51,7 +49,7 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m l.outputs = h*w*n*(classes + 8 + 1); l.inputs = l.outputs; l.max_boxes = max_boxes; - l.truth_size = 4 + 2; + l.truth_size = 4 + 2; l.truths = l.max_boxes*l.truth_size; l.delta = (float*)calloc(batch*l.outputs, sizeof(float)); l.output = (float*)calloc(batch*l.outputs, sizeof(float)); @@ -470,7 +468,6 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if (class_id >= l.classes) { printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); printf(" truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id); - if (check_mistakes) getchar(); continue; // if label contains class_id more than number of classes in the cfg-file } if(!truth.x) break; diff --git a/src/Detector/darknet/src/gemm.c b/src/Detector/darknet/src/gemm.c index 5f5c9689c..256061bbe 100644 --- a/src/Detector/darknet/src/gemm.c +++ b/src/Detector/darknet/src/gemm.c @@ -297,8 +297,6 @@ void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED, } free(count_arr); - - //getchar(); } */ diff --git a/src/Detector/darknet/src/getopt.c b/src/Detector/darknet/src/getopt.c index 45d2b8e63..61aa096ab 100644 --- a/src/Detector/darknet/src/getopt.c +++ b/src/Detector/darknet/src/getopt.c @@ -64,8 +64,8 @@ permute_args(int panonopt_start, int panonopt_end, int opt_end, char* swap; /* - * compute lengths of blocks and number and size of cycles - */ + * compute lengths of blocks and number and size of cycles + */ nnonopts = panonopt_end - panonopt_start; nopts = opt_end - panonopt_end; ncycle = gcd(nnonopts, nopts); @@ -91,7 +91,7 @@ permute_args(int panonopt_start, int panonopt_end, int opt_end, #ifdef REPLACE_GETOPT /* * getopt -- - * Parse argc/argv argument vector. + * Parse argc/argv argument vector. * * [eventually this will replace the BSD getopt] */ @@ -99,13 +99,13 @@ int getopt(int nargc, char* const* nargv, const char* options) { /* - * We don't pass FLAG_PERMUTE to getopt_internal() since - * the BSD getopt(3) (unlike GNU) has never done this. - * - * Furthermore, since many privileged programs call getopt() - * before dropping privileges it makes sense to keep things - * as simple (and bug-free) as possible. - */ + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); } #endif /* REPLACE_GETOPT */ @@ -129,24 +129,24 @@ int getopt(int nargc, char* const* nargv, const char* options) extern "C" { #endif -struct option /* specification for a long form option... */ +struct option /* specification for a long form option... */ { const char* name; /* option name, without leading hyphens */ - int has_arg; /* does it take an argument? */ - int* flag; /* where to save its status, or NULL */ - int val; /* its associated status value */ + int has_arg; /* does it take an argument? */ + int* flag; /* where to save its status, or NULL */ + int val; /* its associated status value */ }; -enum /* permitted values for its `has_arg' field... */ +enum /* permitted values for its `has_arg' field... */ { - no_argument = 0, /* option never takes an argument */ - required_argument, /* option always requires an argument */ - optional_argument /* option may take an argument */ + no_argument = 0, /* option never takes an argument */ + required_argument, /* option always requires an argument */ + optional_argument /* option may take an argument */ }; /* * parse_long_options -- - * Parse long options in argc/argv argument vector. + * Parse long options in argc/argv argument vector. * Returns -1 if short_too is set and the option does not match long_options. */ static int @@ -186,9 +186,9 @@ parse_long_options(char* const* nargv, const char* options, break; } /* - * If this is a known short option, don't allow - * a partial match of a single character. - */ + * If this is a known short option, don't allow + * a partial match of a single character. + */ if (short_too && current_argv_len == 1) continue; @@ -212,8 +212,8 @@ parse_long_options(char* const* nargv, const char* options, warnx(noarg, (int)current_argv_len, current_argv); /* - * XXX: GNU sets optopt to val regardless of flag - */ + * XXX: GNU sets optopt to val regardless of flag + */ if (long_options[match].flag == NULL) optopt = long_options[match].val; else @@ -225,23 +225,23 @@ parse_long_options(char* const* nargv, const char* options, optarg = has_equal; else if (long_options[match].has_arg == required_argument) { /* - * optional argument doesn't use next nargv - */ + * optional argument doesn't use next nargv + */ optarg = nargv[optind++]; } } if ((long_options[match].has_arg == required_argument) && (optarg == NULL)) { /* - * Missing argument; leading ':' indicates no error - * should be generated. - */ + * Missing argument; leading ':' indicates no error + * should be generated. + */ if (PRINT_ERROR) warnx(recargstring, current_argv); /* - * XXX: GNU sets optopt to val regardless of flag - */ + * XXX: GNU sets optopt to val regardless of flag + */ if (long_options[match].flag == NULL) optopt = long_options[match].val; else @@ -271,7 +271,7 @@ parse_long_options(char* const* nargv, const char* options, /* * getopt_internal -- - * Parse argc/argv argument vector. Called by user level routines. + * Parse argc/argv argument vector. Called by user level routines. */ static int getopt_internal(int nargc, char* const* nargv, const char* options, @@ -285,19 +285,19 @@ getopt_internal(int nargc, char* const* nargv, const char* options, return (-1); /* - * XXX Some GNU programs (like cvs) set optind to 0 instead of - * XXX using optreset. Work around this braindamage. - */ + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ if (optind == 0) optind = optreset = 1; /* - * Disable GNU extensions if POSIXLY_CORRECT is set or options - * string begins with a '+'. - * - * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or - * optreset != 0 for GNU compatibility. - */ + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + * + * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or + * optreset != 0 for GNU compatibility. + */ if (posixly_correct == -1 || optreset != 0) posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); if (*options == '-') @@ -322,9 +322,9 @@ getopt_internal(int nargc, char* const* nargv, const char* options, optind -= nonopt_end - nonopt_start; } else if (nonopt_start != -1) { /* - * If we skipped non-options, set optind - * to the first of them. - */ + * If we skipped non-options, set optind + * to the first of them. + */ optind = nonopt_start; } nonopt_start = nonopt_end = -1; @@ -334,17 +334,17 @@ getopt_internal(int nargc, char* const* nargv, const char* options, place = EMSG; /* found non-option */ if (flags & FLAG_ALLARGS) { /* - * GNU extension: - * return non-option as argument to option 1 - */ + * GNU extension: + * return non-option as argument to option 1 + */ optarg = nargv[optind++]; return (INORDER); } if (!(flags & FLAG_PERMUTE)) { /* - * If no permutation wanted, stop parsing - * at first non-option. - */ + * If no permutation wanted, stop parsing + * at first non-option. + */ return (-1); } /* do permutation */ @@ -364,15 +364,15 @@ getopt_internal(int nargc, char* const* nargv, const char* options, nonopt_end = optind; /* - * If we have "-" do nothing, if "--" we are done. - */ + * If we have "-" do nothing, if "--" we are done. + */ if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { optind++; place = EMSG; /* - * We found an option (--), so if we skipped - * non-options, we have to permute. - */ + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ if (nonopt_end != -1) { permute_args(nonopt_start, nonopt_end, optind, nargv); @@ -384,11 +384,11 @@ getopt_internal(int nargc, char* const* nargv, const char* options, } /* - * Check long options if: - * 1) we were passed some - * 2) the arg is not just "-" - * 3) either the arg starts with -- we are getopt_long_only() - */ + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) { short_too = 0; if (*place == '-') @@ -406,10 +406,10 @@ getopt_internal(int nargc, char* const* nargv, const char* options, if ((optchar = (int)*place++) == (int)':' || (optchar == (int)'-' && *place != '\0') || (oli = (char*)strchr(options, optchar)) == NULL) { /* - * If the user specified "-" and '-' isn't listed in - * options, return -1 (non-option) as per POSIX. - * Otherwise, it is an unknown option character (or ':'). - */ + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ if (optchar == (int)'-' && *place == '\0') return (-1); if (!*place) @@ -462,7 +462,7 @@ getopt_internal(int nargc, char* const* nargv, const char* options, /* * getopt_long -- - * Parse argc/argv argument vector. + * Parse argc/argv argument vector. */ int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx) @@ -474,7 +474,7 @@ int getopt_long(int nargc, char* const* nargv, const char* options, /* * getopt_long_only -- - * Parse argc/argv argument vector. + * Parse argc/argv argument vector. */ int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx) diff --git a/src/Detector/darknet/src/getopt.h b/src/Detector/darknet/src/getopt.h index 8266c734e..a9f913d4f 100644 --- a/src/Detector/darknet/src/getopt.h +++ b/src/Detector/darknet/src/getopt.h @@ -75,12 +75,12 @@ extern "C" { #define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ -//extern int optind; /* index of first non-option in argv */ -//extern int optopt; /* single option character, as parsed */ -//extern int opterr; /* flag to enable built-in diagnostics... */ -// /* (user may set to zero, to suppress) */ +//extern int optind; /* index of first non-option in argv */ +//extern int optopt; /* single option character, as parsed */ +//extern int opterr; /* flag to enable built-in diagnostics... */ +// /* (user may set to zero, to suppress) */ // -//extern char *optarg; /* pointer to argument of current option */ +//extern char *optarg; /* pointer to argument of current option */ #define PRINT_ERROR ((opterr) && (*options != ':')) @@ -145,7 +145,7 @@ static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char #ifdef REPLACE_GETOPT /* * getopt -- - * Parse argc/argv argument vector. + * Parse argc/argv argument vector. * * [eventually this will replace the BSD getopt] */ @@ -187,26 +187,26 @@ extern "C" { /* * parse_long_options -- - * Parse long options in argc/argv argument vector. + * Parse long options in argc/argv argument vector. * Returns -1 if short_too is set and the option does not match long_options. */ /* static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too); */ /* * getopt_internal -- - * Parse argc/argv argument vector. Called by user level routines. + * Parse argc/argv argument vector. Called by user level routines. */ /* static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags); */ /* * getopt_long -- - * Parse argc/argv argument vector. + * Parse argc/argv argument vector. */ int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx); /* * getopt_long_only -- - * Parse argc/argv argument vector. + * Parse argc/argv argument vector. */ int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx); diff --git a/src/Detector/darknet/src/go.c b/src/Detector/darknet/src/go.c index 1c7a1a083..5107125e7 100644 --- a/src/Detector/darknet/src/go.c +++ b/src/Detector/darknet/src/go.c @@ -142,7 +142,7 @@ void train_go(char *cfgfile, char *weightfile) float loss = train_network_datum(net, board, move) / net.batch; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); if(*net.seen/N > epoch){ epoch = *net.seen/N; char buff[256]; @@ -580,9 +580,9 @@ void engine_go(char *filename, char *weightfile, int multi) fprintf(f, "final_status_list dead\n"); fclose(f); #ifdef _WIN32 - FILE *p = _popen("./gnugo --mode gtp < game.txt", "r"); + FILE *p = _popen("./gnugo --mode gtp < game.txt", "r"); #else - FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); #endif for(i = 0; i < count; ++i){ free(fgetl(p)); @@ -721,9 +721,9 @@ float score_game(float *board) fprintf(f, "final_score\n"); fclose(f); #ifdef _WIN32 - FILE *p = _popen("./gnugo --mode gtp < game.txt", "r"); + FILE *p = _popen("./gnugo --mode gtp < game.txt", "r"); #else - FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); + FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); #endif for(i = 0; i < count; ++i){ free(fgetl(p)); @@ -740,9 +740,9 @@ float score_game(float *board) } if(player == 'W') score = -score; #ifdef _WIN32 - _pclose(p); + _pclose(p); #else - pclose(p); + pclose(p); #endif return score; } diff --git a/src/Detector/darknet/src/http_stream.cpp b/src/Detector/darknet/src/http_stream.cpp index 72cc1c6e9..cac00a09d 100644 --- a/src/Detector/darknet/src/http_stream.cpp +++ b/src/Detector/darknet/src/http_stream.cpp @@ -508,7 +508,7 @@ class MJPG_sender } char head[400]; - sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); + sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %d\r\n\r\n", outlen); _write(s, head, 0); int n = _write(s, (char*)(&outbuf[0]), outlen); cerr << "known client: " << s << ", sent = " << n << ", must be sent outlen = " << outlen << endl; @@ -782,8 +782,7 @@ int check_prob(detection det, float thresh) int check_classes_id(detection det1, detection det2, float thresh) { if (det1.classes != det2.classes) { - printf(" Error: det1.classes != det2.classes \n"); - getchar(); + error("Error: det1.classes != det2.classes", DARKNET_LOC); } int det1_id = -1; @@ -832,7 +831,7 @@ float *make_float_array(float* src, size_t size) } struct detection_t : detection { - int det_count = 0; + int det_count; detection_t(detection det) : detection(det), det_count(0) { if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size); @@ -840,7 +839,7 @@ struct detection_t : detection { if (uc) uc = make_float_array(det.uc, 4); } - detection_t(detection_t const& det) : detection(det), det_count(0) + detection_t(detection_t const& det) : detection(det) { if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size); if (prob) prob = make_float_array(det.prob, classes); diff --git a/src/Detector/darknet/src/im2col_kernels.cu b/src/Detector/darknet/src/im2col_kernels.cu index f924b3e5c..ac7ccc84c 100644 --- a/src/Detector/darknet/src/im2col_kernels.cu +++ b/src/Detector/darknet/src/im2col_kernels.cu @@ -206,8 +206,8 @@ void im2col_align_ongpu(float *im, int height_col = (height + 2 * pad - ksize) / stride + 1; int width_col = (width + 2 * pad - ksize) / stride + 1; int num_kernels = channels * height_col * width_col; - im2col_align_gpu_kernel << <(num_kernels + BLOCK - 1) / BLOCK, - BLOCK, 0, get_cuda_stream() >> >( + im2col_align_gpu_kernel <<<(num_kernels + BLOCK - 1) / BLOCK, + BLOCK, 0, get_cuda_stream() >>>( num_kernels, im, height, width, ksize, pad, stride, height_col, width_col, data_col, bit_align); @@ -334,9 +334,9 @@ void im2col_align_bin_ongpu(float *im, int num_kernels = channels * ksize * ksize; int num_blocks = num_kernels / BLOCK + 1; - //im2col_align_bin_gpu_kernel << <(num_kernels + BLOCK - 1) / BLOCK, - im2col_align_bin_gpu_kernel << > >( + //im2col_align_bin_gpu_kernel <<<(num_kernels + BLOCK - 1) / BLOCK, + im2col_align_bin_gpu_kernel <<>>( num_kernels, im, height, width, ksize, channels, pad, stride, height_col, width_col, data_col, bit_align); @@ -500,7 +500,7 @@ __device__ void transpose8rS32_reversed_diagonale(unsigned char* A, unsigned cha B[7 * n] = reverse_byte_CUDA(x >> 24); B[6 * n] = reverse_byte_CUDA(x >> 16); B[5 * n] = reverse_byte_CUDA(x >> 8); B[4 * n] = reverse_byte_CUDA(x); B[3 * n] = reverse_byte_CUDA(y >> 24); B[2 * n] = reverse_byte_CUDA(y >> 16); B[1 * n] = reverse_byte_CUDA(y >> 8); B[0 * n] = reverse_byte_CUDA(y); - //__device__ ​ unsigned int __brev(unsigned int x) + //__device__ ​ unsigned int __brev(unsigned int x) //Reverse the bit order of a 32 bit unsigned integer. // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html } @@ -648,8 +648,8 @@ void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const in int size32 = n*m / (32*32) + 1; //const int num_blocks = size / BLOCK + 1; const int num_blocks32 = size32 / BLOCK_TRANSPOSE32 + 1; - transpose_bin_gpu_kernel_32 << > >((uint32_t *)A, (uint32_t *)B, n, m, lda, ldb, block_size); - //transpose_bin_gpu_kernel << > >(A, B, n, m, lda, ldb, block_size); + transpose_bin_gpu_kernel_32 <<>>((uint32_t *)A, (uint32_t *)B, n, m, lda, ldb, block_size); + //transpose_bin_gpu_kernel <<>>(A, B, n, m, lda, ldb, block_size); CHECK_CUDA(cudaPeekAtLastError()); } // -------------------------------- @@ -676,7 +676,7 @@ void transpose_uint32_gpu(uint32_t *src, uint32_t *dst, int src_h, int src_w, in { int size = src_w * src_h; const int num_blocks = size / BLOCK + 1; - transpose_uint32_kernel << > >(src, dst, src_h, src_w, src_align, dst_align); + transpose_uint32_kernel <<>>(src, dst, src_h, src_w, src_align, dst_align); CHECK_CUDA(cudaPeekAtLastError()); } // -------------------------------- @@ -742,7 +742,7 @@ void transpose_uint32_gpu_2(uint32_t *src, uint32_t *dst, int src_h, int src_w, int size = src_w_align * src_h_align; int num_blocks = size / TRANS_BLOCK; - transpose_uint32_kernel_2 << > >(src, dst, src_h, src_w, src_align, dst_align); + transpose_uint32_kernel_2 <<>>(src, dst, src_h, src_w, src_align, dst_align); CHECK_CUDA(cudaPeekAtLastError()); } // -------------------------------- @@ -780,7 +780,7 @@ void repack_input_gpu(float *input, float *re_packed_input, int w, int h, int c) { int size = w * h * c; const int num_blocks = size / BLOCK + 1; - repack_input_kernel << > >(input, re_packed_input, w, h, c); + repack_input_kernel <<>>(input, re_packed_input, w, h, c); CHECK_CUDA(cudaPeekAtLastError()); } // -------------------------------- @@ -820,7 +820,7 @@ void repack_input_gpu_2(float *input, float *re_packed_input, int w, int h, int { int size = w * h * c; const int num_blocks = size / BLOCK + 1; - repack_input_kernel_2 << > >(input, re_packed_input, w, h, c); + repack_input_kernel_2 <<>>(input, re_packed_input, w, h, c); CHECK_CUDA(cudaPeekAtLastError()); } // -------------------------------- @@ -870,7 +870,7 @@ void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, in const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(size, block_size); //printf("\n num_blocks = %d, num_blocks/32 = %d, block_size = %d \n", num_blocks, num_blocks / 32, block_size); - repack_input_kernel_bin << > >(input, re_packed_input_bin, w, h, c); + repack_input_kernel_bin <<>>(input, re_packed_input_bin, w, h, c); CHECK_CUDA(cudaPeekAtLastError()); } @@ -921,7 +921,7 @@ void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, in const int block_size = 256;// 128; const int num_blocks = get_number_of_blocks(size, block_size); printf("\n num_blocks = %d, num_blocks/32 = %d, block_size = %d \n", num_blocks, num_blocks/32, block_size); - repack_input_kernel_bin << > >(input, re_packed_input_bin, w, h, c); + repack_input_kernel_bin <<>>(input, re_packed_input_bin, w, h, c); CHECK_CUDA(cudaPeekAtLastError()); } */ @@ -1351,17 +1351,17 @@ __global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, i // wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag); int32_t c_val[8]; // 8 x 32 threads = 256 - #pragma UNROLL + #pragma unroll for (int local_j = 0; local_j < 8; ++local_j) { uint32_t b_val_cur = __shfl_custom(b_val, local_j * 4 + k_d); c_val[local_j] = __popc(xor_int32(a_val, b_val_cur)); } - #pragma UNROLL + #pragma unroll for (int local_j = 0; local_j < 8; ++local_j) { - #pragma UNROLL + #pragma unroll for (int local_k = 0; local_k < 4; ++local_k) { accum_c_val[local_j + c_x*8] += __shfl_custom(c_val[local_j], i_d * 4 + local_k); } @@ -1386,7 +1386,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, i { int j_d = lane_id % WMMA_N; { - #pragma UNROLL + #pragma unroll for (int i_d = lane_id / WMMA_N; i_d < WMMA_M; i_d += WMMA_M / 2) { int count = C_s[warp_id*WMMA_M*WMMA_N + i_d*WMMA_N + j_d + WMMA_M*WMMA_N*32*c_x]; @@ -1506,17 +1506,17 @@ __global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, i // wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag); int32_t c_val[8]; // 8 x 32 threads = 256 - #pragma UNROLL + #pragma unroll for (int local_j = 0; local_j < 8; ++local_j) { uint32_t b_val_cur = __shfl_custom(b_val, local_j *4 + k_d); c_val[local_j] = __popc(xor_int32(a_val, b_val_cur)); } - #pragma UNROLL + #pragma unroll for (int local_j = 0; local_j < 8; ++local_j) { - #pragma UNROLL + #pragma unroll for (int local_k = 0; local_k < 4; ++local_k) { accum_c_val[local_j] += __shfl_custom(c_val[local_j], i_d * 4 + local_k); } @@ -1750,8 +1750,8 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-subbyte // nvcuda::wmma::col_major -> cutlass::MatrixLayout::kColumnMajor (matrix is not transposed) -// Matrix A Matrix B Accumulator Matrix Size (m-n-k) -// precision::b1 precision::b1 int 8x8x128 +// Matrix A Matrix B Accumulator Matrix Size (m-n-k) +// precision::b1 precision::b1 int 8x8x128 // The only dimensions currently supported by WMMA for XNOR // const int WMMA_M = 8; @@ -1791,7 +1791,7 @@ void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K, //printf(" lda = %d, ldb = %d, ldc = %d, lda/32 = %d, ldb/32 = %d, ldc/32 = %d \n", lda, ldb, ldc, lda / 32, ldb / 32, ldc / 32); //printf(" l.c (K/9) = %d, M (l.n) = %d \n", (K%9 == 0)? K / 9: K, M); - gemm_nn_custom_bin_mean_transposed_tensor_kernel << > > ( + gemm_nn_custom_bin_mean_transposed_tensor_kernel <<>> ( M, N, K, A, lda, B, ldb, @@ -1800,12 +1800,11 @@ void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K, shortcut_in_gpu, shortcut_out_gpu); //cudaDeviceSynchronize(); - //getchar(); } else #endif //# CUDART_VERSION >= 10000 { - gemm_nn_custom_bin_mean_transposed_gpu_kernel << > > ( + gemm_nn_custom_bin_mean_transposed_gpu_kernel <<>> ( M, N, K, A, lda, B, ldb, @@ -1993,7 +1992,7 @@ void convolve_gpu(float *input, float *weights, float *output, int in_w, int in_ const int num_blocks = array_size / BLOCK + 1; //printf("\n array_size = %d, num_blocks = %d, w = %d, h = %d, n = %d, c = %d, pad = %d \n", array_size, num_blocks, in_w, in_h, n, in_c, pad); - convolve_gpu_kernel << > > (input, weights, output, in_w, in_h, in_c, n, size, pad); + convolve_gpu_kernel <<>> (input, weights, output, in_w, in_h, in_c, n, size, pad); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2209,7 +2208,7 @@ void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int const int num_blocks = array_size / BLOCK + 1; //printf("\n array_size = %d, num_blocks = %d, w = %d, h = %d, n = %d, c = %d, pad = %d \n", array_size, num_blocks, in_w, in_h, n, in_c, pad); - convolve_bin_gpu_kernel << > > (input, weights, output, in_w, in_h, in_c, n, size, pad, new_lda, mean_arr_gpu); + convolve_bin_gpu_kernel <<>> (input, weights, output, in_w, in_h, in_c, n, size, pad, new_lda, mean_arr_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -2278,11 +2277,11 @@ void im2col_gpu_ext(const float* data_im, const int channels, (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col; // NOLINT_NEXT_LINE(whitespace/operators) - im2col_gpu_kernel_ext << > >( + im2col_gpu_kernel_ext <<>>( num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, width_col, data_col); CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file +} diff --git a/src/Detector/darknet/src/image.c b/src/Detector/darknet/src/image.c index 8ef026d33..b238db352 100644 --- a/src/Detector/darknet/src/image.c +++ b/src/Detector/darknet/src/image.c @@ -13,16 +13,13 @@ #ifndef STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_IMPLEMENTATION -#include "stb_image.h" +#include #endif #ifndef STB_IMAGE_WRITE_IMPLEMENTATION #define STB_IMAGE_WRITE_IMPLEMENTATION -#include "stb_image_write.h" +#include #endif -extern int check_mistakes; -//int windows = 0; - float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; float get_color(int c, int x, int max) @@ -280,6 +277,19 @@ image **load_alphabet() return alphabets; } +void free_alphabet(image **alphabet) +{ + int i, j; + const int nsize = 8; + for (j = 0; j < nsize; ++j) { + for (i = 32; i < 127; ++i) { + free_image(alphabet[j][i]); + } + free(alphabet[j]); + } + free(alphabet); +} + // Creates array of detections with prob > thresh and fills best_class for them @@ -705,7 +715,7 @@ void show_image(image p, const char *name) #ifdef OPENCV show_image_cv(p, name); #else - fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); + fprintf(stderr, "Not compiled with OpenCV, saving to %s.jpg instead\n", name); save_image(p, name); #endif // OPENCV } @@ -1498,12 +1508,7 @@ image load_image_stb(char *filename, int channels) char *new_line = "\n"; fwrite(new_line, sizeof(char), strlen(new_line), fw); fclose(fw); - if (check_mistakes) { - printf("\n Error in load_image_stb() \n"); - getchar(); - } return make_image(10, 10, 3); - //exit(EXIT_FAILURE); } if(channels) c = channels; int i,j,k; @@ -1523,7 +1528,7 @@ image load_image_stb(char *filename, int channels) image load_image_stb_resize(char *filename, int w, int h, int c) { - image out = load_image_stb(filename, c); // without OpenCV + image out = load_image_stb(filename, c); if ((h && w) && (h != out.h || w != out.w)) { image resized = resize_image(out, w, h); @@ -1536,10 +1541,9 @@ image load_image_stb_resize(char *filename, int w, int h, int c) image load_image(char *filename, int w, int h, int c) { #ifdef OPENCV - //image out = load_image_stb(filename, c); image out = load_image_cv(filename, c); #else - image out = load_image_stb(filename, c); // without OpenCV + image out = load_image_stb(filename, c); #endif // OPENCV if((h && w) && (h != out.h || w != out.w)){ diff --git a/src/Detector/darknet/src/image.h b/src/Detector/darknet/src/image.h index 90e6a0486..65ccb7c71 100644 --- a/src/Detector/darknet/src/image.h +++ b/src/Detector/darknet/src/image.h @@ -88,6 +88,7 @@ image load_image(char *filename, int w, int h, int c); image load_image_stb_resize(char *filename, int w, int h, int c); //LIB_API image load_image_color(char *filename, int w, int h); image **load_alphabet(); +void free_alphabet(image **alphabet); //float get_pixel(image m, int x, int y, int c); //float get_pixel_extend(image m, int x, int y, int c); diff --git a/src/Detector/darknet/src/image_opencv.cpp b/src/Detector/darknet/src/image_opencv.cpp index c72b0efe4..974a5144d 100644 --- a/src/Detector/darknet/src/image_opencv.cpp +++ b/src/Detector/darknet/src/image_opencv.cpp @@ -113,7 +113,6 @@ extern "C" mat_cv *load_image_mat_cv(const char *filename, int flag) cerr << "Cannot load image " << shrinked_filename << std::endl; std::ofstream bad_list("bad.list", std::ios::out | std::ios::app); bad_list << shrinked_filename << std::endl; - //if (check_mistakes) getchar(); return NULL; } cv::Mat dst; @@ -1161,12 +1160,12 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz if (iteration_old == 0) cv::putText(img, accuracy_name, cv::Point(10, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 0, 0), 1, CV_AA); - if (iteration_old != 0){ - cv::line(img, + if (iteration_old != 0){ + cv::line(img, cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)), cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)), CV_RGB(255, 0, 0), 1, 8, 0); - } + } sprintf(char_buff, "%2.1f%% ", precision * 100); cv::putText(img, char_buff, cv::Point(10, 28), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 255, 255), 5, CV_AA); @@ -1347,8 +1346,8 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h, // Mat -> image out = mat_to_image(sized); } - catch (...) { - cerr << "OpenCV can't augment image: " << w << " x " << h << " \n"; + catch (const std::exception& e) { + cerr << "OpenCV can't augment image: " << w << " x " << h << " \n" << e.what() << " \n"; out = mat_to_image(*(cv::Mat*)mat); } return out; diff --git a/src/Detector/darknet/src/layer.c b/src/Detector/darknet/src/layer.c index 032a24e0f..758644b56 100644 --- a/src/Detector/darknet/src/layer.c +++ b/src/Detector/darknet/src/layer.c @@ -92,9 +92,9 @@ void free_layer_custom(layer l, int keep_cudnn_desc) if (l.bias_updates) free(l.bias_updates), l.bias_updates = NULL; if (l.scales) free(l.scales), l.scales = NULL; if (l.scale_updates) free(l.scale_updates), l.scale_updates = NULL; - if (l.biases_ema) free(l.biases_ema), l.biases = NULL; - if (l.scales_ema) free(l.scales_ema), l.scales = NULL; - if (l.weights_ema) free(l.weights_ema), l.weights = NULL; + if (l.biases_ema) free(l.biases_ema), l.biases_ema = NULL; + if (l.scales_ema) free(l.scales_ema), l.scales_ema = NULL; + if (l.weights_ema) free(l.weights_ema), l.weights_ema = NULL; if (l.weights) free(l.weights), l.weights = NULL; if (l.weight_updates) free(l.weight_updates), l.weight_updates = NULL; if (l.align_bit_weights) free(l.align_bit_weights); diff --git a/src/Detector/darknet/src/layer.h b/src/Detector/darknet/src/layer.h index e92d3b4a9..5b3d228b1 100644 --- a/src/Detector/darknet/src/layer.h +++ b/src/Detector/darknet/src/layer.h @@ -36,10 +36,10 @@ extern "C" { // NETWORK, // XNOR, // REGION, -// YOLO, +// YOLO, // REORG, -// UPSAMPLE, -// REORG_OLD, +// UPSAMPLE, +// REORG_OLD, // BLANK //} LAYER_TYPE; @@ -48,15 +48,15 @@ extern "C" { //} COST_TYPE; //typedef struct { -// int batch; -// float learning_rate; -// float momentum; -// float decay; -// int adam; -// float B1; -// float B2; -// float eps; -// int t; +// int batch; +// float learning_rate; +// float momentum; +// float decay; +// int adam; +// float B1; +// float B2; +// float eps; +// int t; //} update_args; /* @@ -87,7 +87,7 @@ struct layer{ int side; int stride; int reverse; - int spatial; + int spatial; int pad; int sqrt; int flip; @@ -105,8 +105,8 @@ struct layer{ float shift; float ratio; float learning_rate_scale; - int focal_loss; - int noloss; + int focal_loss; + int noloss; int softmax; int classes; int coords; @@ -118,10 +118,10 @@ struct layer{ int noadjust; int reorg; int log; - int tanh; - int *mask; - int total; - float bflops; + int tanh; + int *mask; + int total; + float bflops; int adam; float B1; @@ -146,14 +146,14 @@ struct layer{ float coord_scale; float object_scale; float noobject_scale; - float mask_scale; + float mask_scale; float class_scale; int bias_match; int random; - float ignore_thresh; - float truth_thresh; + float ignore_thresh; + float truth_thresh; float thresh; - float focus; + float focus; int classfix; int absolute; @@ -208,7 +208,7 @@ struct layer{ int * input_sizes; float * delta; float * output; - float * loss; + float * loss; float * squared; float * norms; @@ -298,8 +298,8 @@ struct layer{ float * weights_gpu; float * weight_updates_gpu; - float * weights_gpu16; - float * weight_updates_gpu16; + float * weights_gpu16; + float * weight_updates_gpu16; float * biases_gpu; float * bias_updates_gpu; @@ -308,7 +308,7 @@ struct layer{ float * scale_updates_gpu; float * output_gpu; - float * loss_gpu; + float * loss_gpu; float * delta_gpu; float * rand_gpu; float * squared_gpu; @@ -318,7 +318,7 @@ struct layer{ cudnnTensorDescriptor_t srcTensorDesc16, dstTensorDesc16; cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; cudnnTensorDescriptor_t dsrcTensorDesc16, ddstTensorDesc16; - cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16; + cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16; cudnnFilterDescriptor_t weightDesc, weightDesc16; cudnnFilterDescriptor_t dweightDesc, dweightDesc16; cudnnConvolutionDescriptor_t convDesc; diff --git a/src/Detector/darknet/src/maxpool_layer.c b/src/Detector/darknet/src/maxpool_layer.c index 89ae55d46..0e7dfd5b5 100644 --- a/src/Detector/darknet/src/maxpool_layer.c +++ b/src/Detector/darknet/src/maxpool_layer.c @@ -142,7 +142,7 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s else cudnn_maxpool_setup(&l); #endif // GPU - l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; + l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; if (avgpool) { if (stride_x == stride_y) fprintf(stderr, "avg %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); @@ -411,4 +411,4 @@ void backward_local_avgpool_layer(const maxpool_layer l, network_state state) } } -} \ No newline at end of file +} diff --git a/src/Detector/darknet/src/maxpool_layer_kernels.cu b/src/Detector/darknet/src/maxpool_layer_kernels.cu index ab39d6b57..1d0d1bd9d 100644 --- a/src/Detector/darknet/src/maxpool_layer_kernels.cu +++ b/src/Detector/darknet/src/maxpool_layer_kernels.cu @@ -155,7 +155,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta size_t n = h*w*c*layer.batch; - forward_maxpool_depth_layer_kernel << > >( + forward_maxpool_depth_layer_kernel <<>>( n, layer.w, layer.h, layer.c, layer.out_c, layer.batch, state.input, layer.output_gpu, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); @@ -192,11 +192,11 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta size_t n = h*w*c*layer.batch; - forward_maxpool_layer_kernel << > > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); + forward_maxpool_layer_kernel <<>> (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); if (layer.maxpool_zero_nonmax) { - forward_zero_nonmax_kernel << > > (n, state.input, layer.output_gpu); + forward_zero_nonmax_kernel <<>> (n, state.input, layer.output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } } @@ -237,7 +237,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st size_t n = h * w * c * layer.batch; - backward_maxpool_depth_layer_kernel << > >(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu); + backward_maxpool_depth_layer_kernel <<>>(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); return; } @@ -248,7 +248,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st CHECK_CUDA(cudaPeekAtLastError()); if (layer.maxpool_zero_nonmax) { - backward_zero_nonmax_kernel << > > (n, layer.indexes_gpu, state.delta); + backward_zero_nonmax_kernel <<>> (n, layer.indexes_gpu, state.delta); CHECK_CUDA(cudaPeekAtLastError()); } } @@ -373,7 +373,7 @@ extern "C" void forward_local_avgpool_layer_gpu(maxpool_layer layer, network_sta size_t n = h*w*c*layer.batch; - forward_local_avgpool_layer_kernel << > > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu); + forward_local_avgpool_layer_kernel <<>> (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu); CHECK_CUDA(cudaPeekAtLastError()); } } @@ -382,6 +382,6 @@ extern "C" void backward_local_avgpool_layer_gpu(maxpool_layer layer, network_st { size_t n = layer.h*layer.w*layer.c*layer.batch; - backward_local_avgpool_layer_kernel << > >(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta); + backward_local_avgpool_layer_kernel <<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/Detector/darknet/src/network.c b/src/Detector/darknet/src/network.c index 92379f053..40c1cbcb7 100644 --- a/src/Detector/darknet/src/network.c +++ b/src/Detector/darknet/src/network.c @@ -776,6 +776,21 @@ float *network_predict(network net, float *input) return out; } +#ifdef CUDA_OPENGL_INTEGRATION +float *network_predict_gl_texture(network *net, uint32_t texture_id) +{ + if(net->batch != 1) { + set_batch_network(net, 1); + } + + if(gpu_index >= 0) { + return network_predict_gpu_gl_texture(*net, texture_id); + } + + return NULL; +} +#endif // CUDA_OPENGL_INTEGRATION + int num_detections(network *net, float thresh) { int i; @@ -889,8 +904,8 @@ void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, f dets[j].best_class_idx = -1; for (i = 0; i < l.classes; ++i) { if (probs[j][i] > highest_prob) { - highest_prob = probs[j][i]; - dets[j].best_class_idx = i; + highest_prob = probs[j][i]; + dets[j].best_class_idx = i; } dets[j].prob[i] = probs[j][i]; } @@ -1033,7 +1048,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names, send_buf = (char *)realloc(send_buf, total_len * sizeof(char)); if (!send_buf) { if (buf) free(buf); - return 0;// exit(-1); + return 0; } strcat(send_buf, buf); free(buf); diff --git a/src/Detector/darknet/src/network.h b/src/Detector/darknet/src/network.h index 7661c8ef8..c8a770642 100644 --- a/src/Detector/darknet/src/network.h +++ b/src/Detector/darknet/src/network.h @@ -1,6 +1,17 @@ // Oh boy, why am I about to do this.... #ifndef NETWORK_H #define NETWORK_H + +/* + * Necessary in C++ to get format macros out of inttypes.h + */ +#ifdef __cplusplus +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS 1 +#endif +#endif +#include + #include "darknet.h" #include @@ -23,7 +34,7 @@ typedef struct network{ float *workspace; int n; int batch; - uint64_t *seen; + uint64_t *seen; float epoch; int subdivisions; float momentum; @@ -61,7 +72,7 @@ typedef struct network{ float exposure; float saturation; float hue; - int small_object; + int small_object; int gpu_index; tree *hierarchy; @@ -71,11 +82,11 @@ typedef struct network{ float **input_gpu; float **truth_gpu; - float **input16_gpu; - float **output16_gpu; - size_t *max_input16_size; - size_t *max_output16_size; - int wait_stream; + float **input16_gpu; + float **output16_gpu; + size_t *max_input16_size; + size_t *max_output16_size; + int wait_stream; #endif } network; @@ -96,6 +107,7 @@ float train_networks(network *nets, int n, data d, int interval); void sync_nets(network *nets, int n, int interval); float train_network_datum_gpu(network net, float *x, float *y); float *network_predict_gpu(network net, float *input); +float *network_predict_gpu_gl_texture(network net, uint32_t texture_id); float * get_network_output_gpu_layer(network net, int i); float * get_network_delta_gpu_layer(network net, int i); float *get_network_output_gpu(network net); @@ -144,7 +156,7 @@ int get_predicted_class_network(network net); void print_network(network net); void visualize_network(network net); int resize_network(network *net, int w, int h); -void set_batch_network(network *net, int b); +//LIB_API void set_batch_network(network *net, int b); int get_network_input_size(network net); float get_network_cost(network net); //LIB_API layer* get_network_layer(network* net, int i); diff --git a/src/Detector/darknet/src/network_kernels.cu b/src/Detector/darknet/src/network_kernels.cu index 7fbc03cf5..3b3d4e6fe 100644 --- a/src/Detector/darknet/src/network_kernels.cu +++ b/src/Detector/darknet/src/network_kernels.cu @@ -222,7 +222,7 @@ void backward_network_gpu(network net, network_state state) int state_input_nan_inf = is_nan_or_inf(state.input, l.outputs * l.batch); printf("\n i - %d is_nan_or_inf(s.delta) = %d \n", i, state_delta_nan_inf); printf(" i - %d is_nan_or_inf(s.input) = %d \n", i, state_input_nan_inf); - if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); getchar(); } + if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); } } */ } @@ -376,7 +376,7 @@ float train_network_datum_gpu(network net, float *x, float *y) float scale = (get_current_iteration(net) / ((float)net.max_batches)); //scale = sin(scale * M_PI); net.learning_rate = net.adversarial_lr * scale; - layer l = net.layers[net.n - 1]; + //layer l = net.layers[net.n - 1]; int y_size = get_network_output_size(net)*net.batch; if (net.layers[net.n - 1].truths) y_size = net.layers[net.n - 1].truths*net.batch; float *truth_cpu = (float *)xcalloc(y_size, sizeof(float)); @@ -747,3 +747,92 @@ float *network_predict_gpu(network net, float *input) //cuda_free(state.input); // will be freed in the free_network() return out; } + +#ifdef CUDA_OPENGL_INTEGRATION +float *network_predict_gpu_gl_texture(network net, uint32_t texture_id) +{ + if (net.gpu_index != cuda_get_device()) + cuda_set_device(net.gpu_index); + int size = get_network_input_size(net) * net.batch; + + // Map the OpenGL texture resource so CUDA can access it. + cudaGraphicsResource_t graphics_resource = NULL; + unsigned int flags = cudaGraphicsRegisterFlagsReadOnly; + CHECK_CUDA(cudaGraphicsGLRegisterImage(&graphics_resource, texture_id, GL_TEXTURE_2D, flags)); + CHECK_CUDA(cudaGraphicsMapResources(1, &graphics_resource, 0)); + + //void* dev_ptr = NULL; + cudaArray_t dev_array = NULL; + CHECK_CUDA(cudaGraphicsSubResourceGetMappedArray(&dev_array, graphics_resource, 0, 0)); + + size_t width = net.w; + size_t height = net.h; + size_t pitch = width * sizeof(float); + + CHECK_CUDA(cudaMemcpy2DFromArray( + net.input_state_gpu, // dst + pitch, // dst_pitch + dev_array, // src + 0, // width offset + 0, // height offset + width * sizeof(float), // width (in bytes) + height * net.c, // height (in rows) + cudaMemcpyDeviceToDevice // Transfer type + )); + + network_state state; + state.index = 0; + state.net = net; + state.input = net.input_state_gpu; + state.truth = 0; + state.train = 0; + state.delta = 0; + + //cudaGraphExec_t instance = (cudaGraphExec_t)net.cuda_graph_exec; + static cudaGraphExec_t instance; + + if ((*net.cuda_graph_ready) == 0) { + static cudaGraph_t graph; + if (net.use_cuda_graph == 1) { + int i; + for (i = 0; i < 16; ++i) switch_stream(i); + + cudaStream_t stream0 = switch_stream(0); + CHECK_CUDA(cudaDeviceSynchronize()); + printf("Try to capture graph... \n"); + //cudaGraph_t graph = (cudaGraph_t)net.cuda_graph; + CHECK_CUDA(cudaStreamBeginCapture(stream0, cudaStreamCaptureModeGlobal)); + } + + // cuda_push_array(state.input, net.input_pinned_cpu, size); + forward_network_gpu(net, state); + + if (net.use_cuda_graph == 1) { + cudaStream_t stream0 = switch_stream(0); + CHECK_CUDA(cudaStreamEndCapture(stream0, &graph)); + CHECK_CUDA(cudaGraphInstantiate(&instance, graph, NULL, NULL, 0)); + (*net.cuda_graph_ready) = 1; + printf(" graph is captured... \n"); + CHECK_CUDA(cudaDeviceSynchronize()); + } + CHECK_CUDA(cudaStreamSynchronize(get_cuda_stream())); + } + else { + cudaStream_t stream0 = switch_stream(0); + //printf(" cudaGraphLaunch \n"); + CHECK_CUDA( cudaGraphLaunch(instance, stream0) ); + CHECK_CUDA( cudaStreamSynchronize(stream0) ); + //printf(" ~cudaGraphLaunch \n"); + } + + float *out = get_network_output_gpu(net); + reset_wait_stream_events(); + //cuda_free(state.input); // will be freed in the free_network() + + // Unmap the OpenGL texture. + cudaGraphicsUnmapResources(1, &graphics_resource, 0); + cudaGraphicsUnregisterResource(graphics_resource); + + return out; +} +#endif // CUDA_OPENGL_INTEGRATION diff --git a/src/Detector/darknet/src/option_list.h b/src/Detector/darknet/src/option_list.h index 726b559aa..3dd66e445 100644 --- a/src/Detector/darknet/src/option_list.h +++ b/src/Detector/darknet/src/option_list.h @@ -26,8 +26,8 @@ float option_find_float_quiet(list *l, char *key, float def); void option_unused(list *l); //typedef struct { -// int classes; -// char **names; +// int classes; +// char **names; //} metadata; //LIB_API metadata get_metadata(char *file); diff --git a/src/Detector/darknet/src/parser.c b/src/Detector/darknet/src/parser.c index 91b67c2d5..65606de6e 100644 --- a/src/Detector/darknet/src/parser.c +++ b/src/Detector/darknet/src/parser.c @@ -217,13 +217,11 @@ convolutional_layer parse_convolutional(list *options, size_params params) int stretch = option_find_int_quiet(options, "stretch", 0); int stretch_sway = option_find_int_quiet(options, "stretch_sway", 0); if ((sway + rotate + stretch + stretch_sway) > 1) { - printf(" Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer \n"); - exit(0); + error("Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer", DARKNET_LOC); } int deform = sway || rotate || stretch || stretch_sway; if (deform && size == 1) { - printf(" Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer \n"); - exit(0); + error("Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer", DARKNET_LOC); } convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation, deform, params.train); @@ -362,17 +360,17 @@ connected_layer parse_connected(list *options, size_params params) softmax_layer parse_softmax(list *options, size_params params) { - int groups = option_find_int_quiet(options, "groups", 1); - softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups); - layer.temperature = option_find_float_quiet(options, "temperature", 1); - char *tree_file = option_find_str(options, "tree", 0); - if (tree_file) layer.softmax_tree = read_tree(tree_file); - layer.w = params.w; - layer.h = params.h; - layer.c = params.c; - layer.spatial = option_find_float_quiet(options, "spatial", 0); - layer.noloss = option_find_int_quiet(options, "noloss", 0); - return layer; + int groups = option_find_int_quiet(options, "groups", 1); + softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups); + layer.temperature = option_find_float_quiet(options, "temperature", 1); + char *tree_file = option_find_str(options, "tree", 0); + if (tree_file) layer.softmax_tree = read_tree(tree_file); + layer.w = params.w; + layer.h = params.h; + layer.c = params.c; + layer.spatial = option_find_float_quiet(options, "spatial", 0); + layer.noloss = option_find_int_quiet(options, "noloss", 0); + return layer; } contrastive_layer parse_contrastive(list *options, size_params params) @@ -384,8 +382,7 @@ contrastive_layer parse_contrastive(list *options, size_params params) if(yolo_layer_id != 0) yolo_layer = params.net.layers + yolo_layer_id; if (yolo_layer->type != YOLO) { printf(" Error: [contrastive] layer should point to the [yolo] layer instead of %d layer! \n", yolo_layer_id); - getchar(); - exit(0); + error("Error!", DARKNET_LOC); } contrastive_layer layer = make_contrastive_layer(params.batch, params.w, params.h, params.c, classes, params.inputs, yolo_layer); @@ -427,7 +424,7 @@ float *get_classes_multipliers(char *cpc, const int classes, const float max_del int *counters_per_class = parse_yolo_mask(cpc, &classes_counters); if (classes_counters != classes) { printf(" number of values in counters_per_class = %d doesn't match with classes = %d \n", classes_counters, classes); - exit(0); + error("Error!", DARKNET_LOC); } float max_counter = 0; int i; @@ -458,12 +455,8 @@ layer parse_yolo(list *options, size_params params) int max_boxes = option_find_int_quiet(options, "max", 200); layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n"); - exit(EXIT_FAILURE); + error("Error: l.outputs == params.inputs, filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer", DARKNET_LOC); } - //assert(l.outputs == params.inputs); - l.show_details = option_find_int_quiet(options, "show_details", 1); l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 char *cpc = option_find_str(options, "counters_per_class", 0); @@ -532,7 +525,6 @@ layer parse_yolo(list *options, size_params params) printf(" embedding_size = %d \n", l.embedding_size); if (le.n % l.n != 0) { printf(" Warning: filters=%d number in embedding_layer=%d isn't divisable by number of anchors %d \n", le.n, embedding_layer_id, l.n); - getchar(); } } @@ -592,11 +584,8 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 int *mask = parse_gaussian_yolo_mask(a, &num); layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer \n"); - exit(EXIT_FAILURE); + error("Error: l.outputs == params.inputs, filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer", DARKNET_LOC); } - //assert(l.outputs == params.inputs); l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 char *cpc = option_find_str(options, "counters_per_class", 0); l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta); @@ -683,9 +672,7 @@ layer parse_region(list *options, size_params params) layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes); if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n"); - exit(EXIT_FAILURE); + error("Error: l.outputs == params.inputs, filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer", DARKNET_LOC); } //assert(l.outputs == params.inputs); @@ -936,8 +923,7 @@ layer parse_shortcut(list *options, size_params params, network net) else if (strcmp(weights_type_str, "per_channel") == 0) weights_type = PER_CHANNEL; else if (strcmp(weights_type_str, "none") != 0) { printf("Error: Incorrect weights_type = %s \n Use one of: none, per_feature, per_channel \n", weights_type_str); - getchar(); - exit(0); + error("Error!", DARKNET_LOC); } char *weights_normalization_str = option_find_str_quiet(options, "weights_normalization", "none"); @@ -946,8 +932,7 @@ layer parse_shortcut(list *options, size_params params, network net) else if (strcmp(weights_normalization_str, "softmax") == 0) weights_normalization = SOFTMAX_NORMALIZATION; else if (strcmp(weights_type_str, "none") != 0) { printf("Error: Incorrect weights_normalization = %s \n Use one of: none, relu, softmax \n", weights_normalization_str); - getchar(); - exit(0); + error("Error!", DARKNET_LOC); } char *l = option_find(options, "from"); @@ -1227,8 +1212,7 @@ void parse_net_options(list *options, network *net) net->contrastive_color = option_find_int_quiet(options, "contrastive_color", 0); net->unsupervised = option_find_int_quiet(options, "unsupervised", 0); if (net->contrastive && mini_batch < 2) { - printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n"); - exit(0); + error("Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss!", DARKNET_LOC); } net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); net->resize_step = option_find_float_quiet(options, "resize_step", 32); @@ -1837,7 +1821,7 @@ list *read_cfg(char *filename) free(line); break; default: - if(!current || (current && !read_option(line, current->options))){ + if(!read_option(line, current->options)){ fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); free(line); } @@ -2006,12 +1990,12 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema) FILE *fp = fopen(filename, "wb"); if(!fp) file_error(filename); - int major = MAJOR_VERSION; - int minor = MINOR_VERSION; - int revision = PATCH_VERSION; - fwrite(&major, sizeof(int), 1, fp); - fwrite(&minor, sizeof(int), 1, fp); - fwrite(&revision, sizeof(int), 1, fp); + int32_t major = MAJOR_VERSION; + int32_t minor = MINOR_VERSION; + int32_t revision = PATCH_VERSION; + fwrite(&major, sizeof(int32_t), 1, fp); + fwrite(&minor, sizeof(int32_t), 1, fp); + fwrite(&revision, sizeof(int32_t), 1, fp); (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration fwrite(net.seen, sizeof(uint64_t), 1, fp); @@ -2267,12 +2251,12 @@ void load_weights_upto(network *net, char *filename, int cutoff) FILE *fp = fopen(filename, "rb"); if(!fp) file_error(filename); - int major; - int minor; - int revision; - fread(&major, sizeof(int), 1, fp); - fread(&minor, sizeof(int), 1, fp); - fread(&revision, sizeof(int), 1, fp); + int32_t major; + int32_t minor; + int32_t revision; + fread(&major, sizeof(int32_t), 1, fp); + fread(&minor, sizeof(int32_t), 1, fp); + fread(&revision, sizeof(int32_t), 1, fp); if ((major * 10 + minor) >= 2) { printf("\n seen 64"); uint64_t iseen = 0; diff --git a/src/Detector/darknet/src/region_layer.c b/src/Detector/darknet/src/region_layer.c index b7aba32e1..506cc9fc6 100644 --- a/src/Detector/darknet/src/region_layer.c +++ b/src/Detector/darknet/src/region_layer.c @@ -306,7 +306,6 @@ void forward_region_layer(const region_layer l, network_state state) int class_id = state.truth[t * l.truth_size + b*l.truths + 4]; if (class_id >= l.classes) { printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); - getchar(); continue; // if label contains class_id more than number of classes in the cfg-file } diff --git a/src/Detector/darknet/src/representation_layer.c b/src/Detector/darknet/src/representation_layer.c index fe7741fa9..3efe74f58 100644 --- a/src/Detector/darknet/src/representation_layer.c +++ b/src/Detector/darknet/src/representation_layer.c @@ -155,5 +155,3 @@ void push_implicit_layer(layer l) CHECK_CUDA(cudaPeekAtLastError()); } #endif - - diff --git a/src/Detector/darknet/src/rnn_vid.c b/src/Detector/darknet/src/rnn_vid.c index a5ff52784..c521c7588 100644 --- a/src/Detector/darknet/src/rnn_vid.c +++ b/src/Detector/darknet/src/rnn_vid.c @@ -160,7 +160,7 @@ void generate_vid_rnn(char *cfgfile, char *weightfile) //CvCapture* cap = cvCaptureFromFile("extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); float *feat; float *next; - next = NULL; + next = NULL; image last; for(i = 0; i < 25; ++i){ image im = get_image_from_stream_cpp(cap); diff --git a/src/Detector/darknet/src/route_layer.c b/src/Detector/darknet/src/route_layer.c index c92321018..23dfa0473 100644 --- a/src/Detector/darknet/src/route_layer.c +++ b/src/Detector/darknet/src/route_layer.c @@ -59,8 +59,7 @@ void resize_route_layer(route_layer *l, network *net) l->out_c += next.out_c; }else{ printf("Error: Different size of input layers: %d x %d, %d x %d\n", next.out_w, next.out_h, first.out_w, first.out_h); - l->out_h = l->out_w = l->out_c = 0; - exit(EXIT_FAILURE); + error("Error!", DARKNET_LOC); } } l->out_c = l->out_c / l->groups; diff --git a/src/Detector/darknet/src/softmax_layer.c b/src/Detector/darknet/src/softmax_layer.c index c0693f019..6535c5780 100644 --- a/src/Detector/darknet/src/softmax_layer.c +++ b/src/Detector/darknet/src/softmax_layer.c @@ -14,16 +14,16 @@ void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output) { - int b; - for (b = 0; b < batch; ++b) { - int i; - int count = 0; - for (i = 0; i < hierarchy->groups; ++i) { - int group_size = hierarchy->group_size[i]; - softmax(input + b*inputs + count, group_size, temp, output + b*inputs + count, 1); - count += group_size; - } - } + int b; + for (b = 0; b < batch; ++b) { + int i; + int count = 0; + for (i = 0; i < hierarchy->groups; ++i) { + int group_size = hierarchy->group_size[i]; + softmax(input + b*inputs + count, group_size, temp, output + b*inputs + count, 1); + count += group_size; + } + } } softmax_layer make_softmax_layer(int batch, int inputs, int groups) @@ -89,28 +89,28 @@ void pull_softmax_layer_output(const softmax_layer layer) void forward_softmax_layer_gpu(const softmax_layer l, network_state net) { if(l.softmax_tree){ - softmax_tree_gpu(net.input, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); - /* - int i; - int count = 0; - for (i = 0; i < l.softmax_tree->groups; ++i) { - int group_size = l.softmax_tree->group_size[i]; - softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); - count += group_size; - } - */ + softmax_tree_gpu(net.input, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); + /* + int i; + int count = 0; + for (i = 0; i < l.softmax_tree->groups; ++i) { + int group_size = l.softmax_tree->group_size[i]; + softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); + count += group_size; + } + */ } else { if(l.spatial){ - softmax_gpu_new_api(net.input, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); + softmax_gpu_new_api(net.input, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); }else{ - softmax_gpu_new_api(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); + softmax_gpu_new_api(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); } } if(net.truth && !l.noloss){ softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth, l.delta_gpu, l.loss_gpu); if(l.softmax_tree){ - mask_gpu_new_api(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth, 0); - mask_gpu_new_api(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth, 0); + mask_gpu_new_api(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth, 0); + mask_gpu_new_api(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth, 0); } cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); l.cost[0] = sum_array(l.loss, l.batch*l.inputs); @@ -119,7 +119,7 @@ void forward_softmax_layer_gpu(const softmax_layer l, network_state net) void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) { - axpy_ongpu(layer.batch*layer.inputs, state.net.loss_scale, layer.delta_gpu, 1, state.delta, 1); + axpy_ongpu(layer.batch*layer.inputs, state.net.loss_scale, layer.delta_gpu, 1, state.delta, 1); } #endif @@ -151,12 +151,10 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int c, int cla l.truths = yolo_layer->truths; if (l.embedding_size != yolo_layer->embedding_size) { printf(" Error: [contrastive] embedding_size=%d isn't equal to [yolo] embedding_size=%d. They should use the same [convolutional] layer \n", l.embedding_size, yolo_layer->embedding_size); - getchar(); - exit(0); + error("Error!", DARKNET_LOC); } if (l.inputs % (l.n*l.h*l.w) != 0) { printf(" Warning: filters= number in the previous (embedding) layer isn't divisable by number of anchors %d \n", l.n); - getchar(); } } else { @@ -200,7 +198,7 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int c, int cla printf(" max_contr_size = %d MB \n", max_contr_size / (1024*1024)); l.contrast_p_gpu = (contrastive_params *)cuda_make_array(NULL, max_contr_size); #endif - fprintf(stderr, "contrastive %4d x%4d x%4d x emb_size %4d x batch: %4d classes = %4d, step = %4d \n", w, h, l.n, l.embedding_size, batch, l.classes, step); + fprintf(stderr, "contrastive %4d x%4d x%4d x emb_size %4d x batch: %4d classes = %4d, step = %4zu \n", w, h, l.n, l.embedding_size, batch, l.classes, step); if(l.detection) fprintf(stderr, "detection \n"); return l; } @@ -350,7 +348,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) } if (sim > 1.001 || sim < -1.001) { - printf(" sim = %f, ", sim); getchar(); + printf(" sim = %f, ", sim); } } } @@ -406,7 +404,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) const float P = P_constrastive(b, b2, l.labels, l.batch, z, l.embedding_size, l.temperature, l.cos_sim); l.p_constrastive[b*l.batch + b2] = P; if (P > 1 || P < -1) { - printf(" p = %f, ", P); getchar(); + printf(" p = %f, ", P); } } } @@ -421,7 +419,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) const int max_contr_size = (l.max_boxes*l.batch)*(l.max_boxes*l.batch); if (max_contr_size < contr_size) { printf(" Error: too large number of bboxes: contr_size = %d > max_contr_size = %d \n", contr_size, max_contr_size); - exit(0); + error("Error!", DARKNET_LOC); } int *labels = NULL; if (contr_size > 2) { @@ -480,11 +478,8 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) break; } - //if (q == contr_size) getchar(); - - //if (P > 1 || P < -1) { - // printf(" p = %f, z_index = %d, z_index2 = %d ", P, z_index, z_index2); getchar(); + // printf(" p = %f, z_index = %d, z_index2 = %d ", P, z_index, z_index2); //} } } diff --git a/src/Detector/darknet/src/tag.c b/src/Detector/darknet/src/tag.c index d7e1349a2..c1e031b2c 100644 --- a/src/Detector/darknet/src/tag.c +++ b/src/Detector/darknet/src/tag.c @@ -63,7 +63,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); free_data(train); if(*net.seen/N > epoch){ epoch = *net.seen/N; diff --git a/src/Detector/darknet/src/tree.h b/src/Detector/darknet/src/tree.h index 863797387..c5a09aa97 100644 --- a/src/Detector/darknet/src/tree.h +++ b/src/Detector/darknet/src/tree.h @@ -6,7 +6,7 @@ // int *leaf; // int n; // int *parent; -// int *child; +// int *child; // int *group; // char **name; // diff --git a/src/Detector/darknet/src/utils.c b/src/Detector/darknet/src/utils.c index 0c271b7bf..2ad33d5e5 100644 --- a/src/Detector/darknet/src/utils.c +++ b/src/Detector/darknet/src/utils.c @@ -18,6 +18,7 @@ #else #include #include +#include #endif @@ -239,12 +240,23 @@ void trim(char *str) free(buffer); } +char *strlaststr(char *haystack, char *needle) +{ + char *p = strstr(haystack, needle), *r = NULL; + while (p != NULL) + { + r = p; + p = strstr(p + 1, needle); + } + return r; +} + void find_replace_extension(char *str, char *orig, char *rep, char *output) { char* buffer = (char*)calloc(8192, sizeof(char)); sprintf(buffer, "%s", str); - char *p = strstr(buffer, orig); + char *p = strlaststr(buffer, orig); int offset = (p - buffer); int chars_from_end = strlen(buffer) - offset; if (!p || chars_from_end != strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? @@ -325,10 +337,30 @@ void top_k(float *a, int n, int k, int *index) } } + +void log_backtrace() +{ +#ifndef WIN32 + void * buffer[50]; + int count = backtrace(buffer, sizeof(buffer)); + char **symbols = backtrace_symbols(buffer, count); + + fprintf(stderr, "backtrace (%d entries)\n", count); + + for (int idx = 0; idx < count; idx ++) + { + fprintf(stderr, "%d/%d: %s\n", idx + 1, count, symbols[idx]); + } + + free(symbols); +#endif +} + void error(const char * const msg, const char * const filename, const char * const funcname, const int line) { - fprintf(stderr, "Darknet error location: %s, %s, line #%d\n", filename, funcname, line); + fprintf(stderr, "Darknet error location: %s, %s(), line #%d\n", filename, funcname, line); perror(msg); + log_backtrace(); exit(EXIT_FAILURE); } diff --git a/src/Detector/darknet/src/utils.h b/src/Detector/darknet/src/utils.h index f84054b8b..6a4ea8a35 100644 --- a/src/Detector/darknet/src/utils.h +++ b/src/Detector/darknet/src/utils.h @@ -8,7 +8,7 @@ #include #ifndef M_PI -#define M_PI 3.14159265358979323846 // pi +#define M_PI 3.14159265358979323846 #endif #ifdef __cplusplus diff --git a/src/Detector/darknet/src/writing.c b/src/Detector/darknet/src/writing.c index 29785b7b7..1fed538fa 100644 --- a/src/Detector/darknet/src/writing.c +++ b/src/Detector/darknet/src/writing.c @@ -65,7 +65,7 @@ void train_writing(char *cfgfile, char *weightfile) if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); free_data(train); if(get_current_batch(net)%100 == 0){ char buff[256]; diff --git a/src/Detector/darknet/src/yolo.c b/src/Detector/darknet/src/yolo.c index 92b5f1e6e..ef68acabc 100644 --- a/src/Detector/darknet/src/yolo.c +++ b/src/Detector/darknet/src/yolo.c @@ -198,7 +198,7 @@ void validate_yolo(char *cfgfile, char *weightfile) fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); if (fps) { - for (j = 0; j < classes; ++j) { + for(j = 0; j < classes; ++j){ fclose(fps[j]); } free(fps); @@ -251,7 +251,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms); char labelpath[4096]; - replace_image_to_label(path, labelpath); + replace_image_to_label(path, labelpath); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); @@ -278,6 +278,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile) fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); free(id); + free(truth); free_image(orig); free_image(sized); } @@ -327,11 +328,11 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) free_image(im); free_image(sized); - + free_alphabet(alphabet); wait_until_press_key_cv(); destroy_all_windows_cv(); - if (filename) break; + if (filename) break; } free(boxes); for(j = 0; j < l.side*l.side*l.n; ++j) { @@ -342,16 +343,17 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) void run_yolo(int argc, char **argv) { - int dont_show = find_arg(argc, argv, "-dont_show"); - int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); + int dont_show = find_arg(argc, argv, "-dont_show"); + int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); int json_port = find_int_arg(argc, argv, "-json_port", -1); - char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); + char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); char *prefix = find_char_arg(argc, argv, "-prefix", 0); float thresh = find_float_arg(argc, argv, "-thresh", .2); - float hier_thresh = find_float_arg(argc, argv, "-hier", .5); + float hier_thresh = find_float_arg(argc, argv, "-hier", .5); int cam_index = find_int_arg(argc, argv, "-c", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); - int ext_output = find_arg(argc, argv, "-ext_output"); + int ext_output = find_arg(argc, argv, "-ext_output"); + char *json_file_output = find_char_arg(argc, argv, "-json_file_output", 0); if(argc < 4){ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; @@ -365,5 +367,5 @@ void run_yolo(int argc, char **argv) else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, voc_names, 20, 1, frame_skip, - prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0); + prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0, json_file_output); } diff --git a/src/Detector/darknet/src/yolo_console_dll.cpp b/src/Detector/darknet/src/yolo_console_dll.cpp index 27dca6661..a87574c51 100644 --- a/src/Detector/darknet/src/yolo_console_dll.cpp +++ b/src/Detector/darknet/src/yolo_console_dll.cpp @@ -354,9 +354,7 @@ int main(int argc, char *argv[]) if (filename == "zed_camera" || file_ext == "svo") { std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl; if (!zed.isOpened()) { - std::cout << " Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed. \n"; - getchar(); - return 0; + error("Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed", DARKNET_LOC); } cur_frame = zed_capture_rgb(zed); use_zed_camera = true; @@ -696,8 +694,12 @@ int main(int argc, char *argv[]) show_console_result(result_vec, obj_names); #endif // OPENCV } - catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } - catch (...) { std::cerr << "unknown exception \n"; getchar(); } + catch (std::exception &e) { + std::cerr << "exception: " << e.what() << "\n"; + } + catch (...) { + std::cerr << "unknown exception \n"; + } filename.clear(); } diff --git a/src/Detector/darknet/src/yolo_layer.c b/src/Detector/darknet/src/yolo_layer.c index 0eae2fc8f..de9d09904 100644 --- a/src/Detector/darknet/src/yolo_layer.c +++ b/src/Detector/darknet/src/yolo_layer.c @@ -11,8 +11,6 @@ #include #include -extern int check_mistakes; - layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes) { int i; @@ -31,6 +29,7 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int l.classes = classes; l.cost = (float*)xcalloc(1, sizeof(float)); l.biases = (float*)xcalloc(total * 2, sizeof(float)); + l.nbiases = total * 2; if(mask) l.mask = mask; else{ l.mask = (int*)xcalloc(n, sizeof(int)); @@ -433,7 +432,6 @@ void *process_batch(void* ptr) if (class_id >= l.classes || class_id < 0) { printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); printf("\n truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id); - if (check_mistakes) getchar(); continue; // if label contains class_id more than number of classes in the cfg-file and class_id check garbage value } diff --git a/src/Detector/darknet/src/yolo_v2_class.cpp b/src/Detector/darknet/src/yolo_v2_class.cpp index df3f7acf7..6c0681909 100644 --- a/src/Detector/darknet/src/yolo_v2_class.cpp +++ b/src/Detector/darknet/src/yolo_v2_class.cpp @@ -13,7 +13,7 @@ extern "C" { #include "image.h" #include "demo.h" #include "option_list.h" -#include "stb_image.h" +#include } //#include @@ -69,7 +69,7 @@ int get_device_count() { return count; #else return -1; -#endif // GPU +#endif // GPU } bool built_with_cuda(){ @@ -106,7 +106,7 @@ int get_device_name(int gpu, char* deviceName) { return 1; #else return -1; -#endif // GPU +#endif // GPU } #ifdef GPU diff --git a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake index 37388d30b..7a692b055 100644 --- a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake +++ b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake @@ -1,180 +1,104 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. +# Distributed under the OSI-approved BSD 3-Clause License. +# Copyright Stefano Sinigardi + #.rst: # FindCUDNN -# ------- -# -# Find CUDNN library -# -# Valiables that affect result: -# , , : as usual -# -# : as usual, plus we do find '5.1' version if you wanted '5' -# (not if you wanted '5.0', as usual) +# -------- # -# Result variables +# Result Variables # ^^^^^^^^^^^^^^^^ # -# This module will set the following variables in your project: +# This module will set the following variables in your project:: # -# ``CUDNN_INCLUDE`` -# where to find cudnn.h. -# ``CUDNN_LIBRARY`` -# the libraries to link against to use CUDNN. -# ``CUDNN_FOUND`` -# If false, do not try to use CUDNN. -# ``CUDNN_VERSION`` -# Version of the CUDNN library we looked for +# ``CUDNN_FOUND`` +# True if CUDNN found on the local system # -# Exported functions -# ^^^^^^^^^^^^^^^^ -# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir]) -# This function will try to download and install CUDNN. -# CUDNN5 and CUDNN6 are supported. +# ``CUDNN_INCLUDE_DIRS`` +# Location of CUDNN header files. # +# ``CUDNN_LIBRARIES`` +# The CUDNN libraries. +# +# ``CuDNN::CuDNN`` +# The CUDNN target # -function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir) - message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}") - string(REGEX REPLACE "-rc$" "" version_base "${version}") - set(tar_libdir cuda/lib64) - set(tar_incdir cuda/include) - - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(url_extension tgz) - if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(url_arch_name linux-x64 ) - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc") - set(url_arch_name linux-ppc64le ) - # TX1 has to be installed via JetPack - endif() - elseif (APPLE) - set(url_extension tgz) - set(tar_libdir cuda/lib) - set(url_arch_name osx-x64) - elseif(WIN32) - set(url_extension zip) - set(tar_bindir cuda/bin) - set(tar_libdir cuda/lib/x64) - if(CMAKE_SYSTEM_VERSION MATCHES "10") - set(url_arch_name windows10-x64) - else() - set(url_arch_name windows7-x64) - endif() - endif() - - # Download and install CUDNN locally if not found on the system - if(url_arch_name) - set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version}) - file(MAKE_DIRECTORY ${download_dir}) - set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension}) - set(base_url http://developer.download.nvidia.com/compute/redist/cudnn) - set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename}) - set(cudnn_file ${download_dir}/${cudnn_filename}) - - if(NOT EXISTS ${cudnn_file}) - message(STATUS "Downloading CUDNN library from NVIDIA...") - file(DOWNLOAD ${cudnn_url} ${cudnn_file} - SHOW_PROGRESS STATUS cudnn_status - ) - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status) - - if(NOT "${cudnn_status}" MATCHES "0") - message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN") - endif() - endif() - - if(dest_bindir AND tar_bindir) - file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir}) - endif() - - if(dest_incdir) - file(COPY ${download_dir}/${tar_incdir}/ DESTINATION ${dest_incdir}) - endif() - - file(COPY ${download_dir}/${tar_libdir}/ DESTINATION ${dest_libdir} ) - - get_filename_component(dest_dir ${dest_libdir} DIRECTORY) - - set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE) - unset(CUDNN_LIBRARY CACHE) - unset(CUDNN_INCLUDE_DIR CACHE) - - endif(url_arch_name) -endfunction() - -##################################################### - -find_package(PkgConfig) -pkg_check_modules(PC_CUDNN QUIET CUDNN) - -get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH) - -# We use major only in library search as major/minor is not entirely consistent among platforms. -# Also, looking for exact minor version of .so is in general not a good idea. -# More strict enforcement of minor/patch version is done if/when the header file is examined. -if(CUDNN_FIND_VERSION_EXACT) - SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}") - SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR}) -else() - SET(__cudnn_lib_win_name cudnn64) +include(FindPackageHandleStandardArgs) + +find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h cudnn_v8.h cudnn_v7.h + HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/include + PATH_SUFFIXES cuda/include include) +find_library(CUDNN_LIBRARY NAMES cudnn cudnn8 cudnn7 + HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/lib/x86_64-linux-gnu/ + PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64 cuda/lib/x64) +if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v8.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v8.h CUDNN_HEADER_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v7.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v7.h CUDNN_HEADER_CONTENTS) endif() - -find_library(CUDNN_LIBRARY - NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} - PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} - PATH_SUFFIXES lib lib64 bin - DOC "CUDNN library." ) - -if(CUDNN_LIBRARY) - SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR}) - set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}) - get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH) - find_path(CUDNN_INCLUDE_DIR - NAMES cudnn.h - HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} - PATH_SUFFIXES include - DOC "Path to CUDNN include directory." ) +if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) endif() - -if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) - file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) +if(CUDNN_HEADER_CONTENTS) string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" - CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + _CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" - CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + _CUDNN_VERSION_MAJOR "${_CUDNN_VERSION_MAJOR}") string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" - CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + _CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" - CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + _CUDNN_VERSION_MINOR "${_CUDNN_VERSION_MINOR}") string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" - CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + _CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" - CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") - set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}) -endif() - -if(CUDNN_MAJOR_VERSION) - ## Fixing the case where 5.1 does not fit 'exact' 5. - if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR) - if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}") - set(CUDNN_VERSION ${CUDNN_FIND_VERSION}) - endif() + _CUDNN_VERSION_PATCH "${_CUDNN_VERSION_PATCH}") + if(NOT _CUDNN_VERSION_MAJOR) + set(CUDNN_VERSION "?") + else() + set(CUDNN_VERSION "${_CUDNN_VERSION_MAJOR}.${_CUDNN_VERSION_MINOR}.${_CUDNN_VERSION_PATCH}") endif() -else() - # Try to set CUDNN version from config file - set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER}) endif() -find_package_handle_standard_args( - CUDNN - FOUND_VAR CUDNN_FOUND - REQUIRED_VARS CUDNN_LIBRARY - VERSION_VAR CUDNN_VERSION - ) +set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) +set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) +mark_as_advanced(CUDNN_LIBRARY CUDNN_INCLUDE_DIR) -if(CUDNN_FOUND) - set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) - set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) - set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER}) -endif() +find_package_handle_standard_args(CUDNN + REQUIRED_VARS CUDNN_INCLUDE_DIR CUDNN_LIBRARY + VERSION_VAR CUDNN_VERSION +) + +if(WIN32) + set(CUDNN_DLL_DIR ${CUDNN_INCLUDE_DIR}) + list(TRANSFORM CUDNN_DLL_DIR APPEND "/../bin") + find_file(CUDNN_LIBRARY_DLL NAMES cudnn64_${CUDNN_VERSION_MAJOR}.dll PATHS ${CUDNN_DLL_DIR}) +endif() + +if( CUDNN_FOUND AND NOT TARGET CuDNN::CuDNN ) + if( EXISTS "${CUDNN_LIBRARY_DLL}" ) + add_library( CuDNN::CuDNN SHARED IMPORTED ) + set_target_properties( CuDNN::CuDNN PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY_DLL}" + IMPORTED_IMPLIB "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" ) + else() + add_library( CuDNN::CuDNN UNKNOWN IMPORTED ) + set_target_properties( CuDNN::CuDNN PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" ) + endif() +endif() diff --git a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_ b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_ new file mode 100644 index 000000000..37388d30b --- /dev/null +++ b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_ @@ -0,0 +1,180 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. +#.rst: +# FindCUDNN +# ------- +# +# Find CUDNN library +# +# Valiables that affect result: +# , , : as usual +# +# : as usual, plus we do find '5.1' version if you wanted '5' +# (not if you wanted '5.0', as usual) +# +# Result variables +# ^^^^^^^^^^^^^^^^ +# +# This module will set the following variables in your project: +# +# ``CUDNN_INCLUDE`` +# where to find cudnn.h. +# ``CUDNN_LIBRARY`` +# the libraries to link against to use CUDNN. +# ``CUDNN_FOUND`` +# If false, do not try to use CUDNN. +# ``CUDNN_VERSION`` +# Version of the CUDNN library we looked for +# +# Exported functions +# ^^^^^^^^^^^^^^^^ +# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir]) +# This function will try to download and install CUDNN. +# CUDNN5 and CUDNN6 are supported. +# +# + +function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir) + message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}") + string(REGEX REPLACE "-rc$" "" version_base "${version}") + set(tar_libdir cuda/lib64) + set(tar_incdir cuda/include) + + if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") + set(url_extension tgz) + if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + set(url_arch_name linux-x64 ) + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc") + set(url_arch_name linux-ppc64le ) + # TX1 has to be installed via JetPack + endif() + elseif (APPLE) + set(url_extension tgz) + set(tar_libdir cuda/lib) + set(url_arch_name osx-x64) + elseif(WIN32) + set(url_extension zip) + set(tar_bindir cuda/bin) + set(tar_libdir cuda/lib/x64) + if(CMAKE_SYSTEM_VERSION MATCHES "10") + set(url_arch_name windows10-x64) + else() + set(url_arch_name windows7-x64) + endif() + endif() + + # Download and install CUDNN locally if not found on the system + if(url_arch_name) + set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version}) + file(MAKE_DIRECTORY ${download_dir}) + set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension}) + set(base_url http://developer.download.nvidia.com/compute/redist/cudnn) + set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename}) + set(cudnn_file ${download_dir}/${cudnn_filename}) + + if(NOT EXISTS ${cudnn_file}) + message(STATUS "Downloading CUDNN library from NVIDIA...") + file(DOWNLOAD ${cudnn_url} ${cudnn_file} + SHOW_PROGRESS STATUS cudnn_status + ) + execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status) + + if(NOT "${cudnn_status}" MATCHES "0") + message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN") + endif() + endif() + + if(dest_bindir AND tar_bindir) + file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir}) + endif() + + if(dest_incdir) + file(COPY ${download_dir}/${tar_incdir}/ DESTINATION ${dest_incdir}) + endif() + + file(COPY ${download_dir}/${tar_libdir}/ DESTINATION ${dest_libdir} ) + + get_filename_component(dest_dir ${dest_libdir} DIRECTORY) + + set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE) + unset(CUDNN_LIBRARY CACHE) + unset(CUDNN_INCLUDE_DIR CACHE) + + endif(url_arch_name) +endfunction() + +##################################################### + +find_package(PkgConfig) +pkg_check_modules(PC_CUDNN QUIET CUDNN) + +get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH) + +# We use major only in library search as major/minor is not entirely consistent among platforms. +# Also, looking for exact minor version of .so is in general not a good idea. +# More strict enforcement of minor/patch version is done if/when the header file is examined. +if(CUDNN_FIND_VERSION_EXACT) + SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}") + SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR}) +else() + SET(__cudnn_lib_win_name cudnn64) +endif() + +find_library(CUDNN_LIBRARY + NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} + PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} + PATH_SUFFIXES lib lib64 bin + DOC "CUDNN library." ) + +if(CUDNN_LIBRARY) + SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR}) + set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}) + get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH) + find_path(CUDNN_INCLUDE_DIR + NAMES cudnn.h + HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} + PATH_SUFFIXES include + DOC "Path to CUDNN include directory." ) +endif() + +if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") + set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}) +endif() + +if(CUDNN_MAJOR_VERSION) + ## Fixing the case where 5.1 does not fit 'exact' 5. + if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR) + if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}") + set(CUDNN_VERSION ${CUDNN_FIND_VERSION}) + endif() + endif() +else() + # Try to set CUDNN version from config file + set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER}) +endif() + +find_package_handle_standard_args( + CUDNN + FOUND_VAR CUDNN_FOUND + REQUIRED_VARS CUDNN_LIBRARY + VERSION_VAR CUDNN_VERSION + ) + +if(CUDNN_FOUND) + set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) + set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) + set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER}) +endif()