From ba8f7bf756241be5b766d198a69184fb8095213e Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Sun, 18 Apr 2021 15:11:36 +0200
Subject: [PATCH 01/46] improve build process even more (#7610)

* test for shared libs

* improve flag handling

* build so lib in ci

* clone vcpkg if not found

* fix

* improve clang handling

* bump vcpkg.json version

* gemm.c compatible with ARM

* remove unnecessary vcpkg.json field

* do not unnecessarily rebuild vcpkg in the CI build stage

* use alexeyab nuget cache

* enable self-removal of build folder

* add interactivity in build.ps1 if not opt-out

* spellcheck README

* fix another leftover spelling error

* also auto-update darknet if possible

* do not self-update darknet in CI
---
 .github/workflows/ccpp.yml | 34 +++++++-------
 CMakeLists.txt             | 38 +++++++++-------
 README.md                  | 55 ++++++++++------------
 build.ps1                  | 92 +++++++++++++++++++++++++++++++++----
 src/gemm.c                 | 93 ++++++++++++++------------------------
 src/http_stream.cpp        |  5 +-
 src/httplib.h              |  5 +-
 vcpkg.json                 | 22 +++++++--
 8 files changed, 204 insertions(+), 140 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 077b3cf99f8..d6c18fda3fd 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -111,10 +111,10 @@ jobs:
         ./vcpkg/bootstrap-vcpkg.sh;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
         sources add
-        -source "https://nuget.pkg.github.com/cenit/index.json"
+        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
         -storepasswordincleartext
         -name "vcpkgbinarycache"
-        -username "cenit"
+        -username "AlexeyAB"
         -password "${{ secrets.GITHUB_TOKEN }}"
 
     - name: 'Build'
@@ -124,7 +124,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -ForceStaticLib
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -163,7 +163,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -EnableOPENCV
+      run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -222,7 +222,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -EnableOPENCV -EnableCUDA
+      run: ./build.ps1 -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -251,7 +251,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -ForceCPP
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
   osx-vcpkg:
@@ -271,15 +271,15 @@ jobs:
         ./vcpkg/bootstrap-vcpkg.sh;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
         sources add
-        -source "https://nuget.pkg.github.com/cenit/index.json"
+        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
         -storepasswordincleartext
         -name "vcpkgbinarycache"
-        -username "cenit"
+        -username "AlexeyAB"
         -password "${{ secrets.GITHUB_TOKEN }}"
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -UseVCPKG
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -311,7 +311,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -EnableOPENCV
+      run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -340,7 +340,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -ForceCPP
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
   win-vcpkg:
@@ -357,15 +357,15 @@ jobs:
         ./vcpkg/bootstrap-vcpkg.sh;
         $(./vcpkg/vcpkg fetch nuget | tail -n 1)
         sources add
-        -source "https://nuget.pkg.github.com/cenit/index.json"
+        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
         -storepasswordincleartext
         -name "vcpkgbinarycache"
-        -username "cenit"
+        -username "AlexeyAB"
         -password "${{ secrets.GITHUB_TOKEN }}"
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -UseVCPKG -EnableOPENCV
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -398,7 +398,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1
+      run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -431,7 +431,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -ForceCPP
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
   win-intlibs-cuda:
@@ -454,7 +454,7 @@ jobs:
         CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
         CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe"
       shell: pwsh
-      run: ./build.ps1 -EnableCUDA
+      run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
 
   mingw:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 00f446fcccf..0029abe78ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,8 +19,9 @@ option(ENABLE_CUDNN "Enable CUDNN" ON)
 option(ENABLE_CUDNN_HALF "Enable CUDNN Half precision" ON)
 option(ENABLE_ZED_CAMERA "Enable ZED Camera support" ON)
 option(ENABLE_VCPKG_INTEGRATION "Enable VCPKG integration" ON)
+option(VCPKG_BUILD_OPENCV_WITH_CUDA "Build OpenCV with CUDA extension integration" ON)
 
-if(ENABLE_OPENCV_WITH_CUDA AND NOT APPLE)
+if(VCPKG_BUILD_OPENCV_WITH_CUDA AND NOT APPLE)
   list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda")
 endif()
 if(ENABLE_CUDA AND NOT APPLE)
@@ -33,18 +34,6 @@ if(ENABLE_CUDNN AND ENABLE_CUDA AND NOT APPLE)
   list(APPEND VCPKG_MANIFEST_FEATURES "cudnn")
 endif()
 
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE)
-  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-    set(CMAKE_COMPILER_IS_CLANG TRUE)
-  else()
-    set(CMAKE_COMPILER_IS_CLANG FALSE)
-  endif()
-else()
-  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE)
-  set(CMAKE_COMPILER_IS_CLANG FALSE)
-endif()
-
 if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32)
   execute_process(COMMAND "uname" "-m" OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR OUTPUT_STRIP_TRAILING_WHITESPACE)
 endif()
@@ -87,6 +76,18 @@ enable_language(CXX)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH})
 
+if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE)
+  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "clang")
+    set(CMAKE_COMPILER_IS_CLANG TRUE)
+  else()
+    set(CMAKE_COMPILER_IS_CLANG FALSE)
+  endif()
+else()
+  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE)
+  set(CMAKE_COMPILER_IS_CLANG FALSE)
+endif()
+
 set(default_build_type "Release")
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
@@ -201,12 +202,14 @@ endif()
 
 set(ADDITIONAL_CXX_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings")
 set(ADDITIONAL_C_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings")
+if(UNIX AND BUILD_SHARED_LIBS AND NOT CMAKE_COMPILER_IS_CLANG)
+  set(SHAREDLIB_CXX_FLAGS "-Wl,-Bsymbolic")
+  set(SHAREDLIB_C_FLAGS "-Wl,-Bsymbolic")
+endif()
 
 if(MSVC)
   set(ADDITIONAL_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast")
   set(ADDITIONAL_C_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast")
-  set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}")
   string(REGEX REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
   string(REGEX REPLACE "/O2" "/Ox" CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE})
 endif()
@@ -218,8 +221,6 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG)
       set(CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}")
     endif()
   endif()
-  set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}")
   string(REGEX REPLACE "-O0" "-Og" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
   string(REGEX REPLACE "-O3" "-Ofast" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
   string(REGEX REPLACE "-O0" "-Og" CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
@@ -230,6 +231,9 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG)
   endif()
 endif()
 
+set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${SHAREDLIB_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
+set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${SHAREDLIB_C_FLAGS} ${CMAKE_C_FLAGS}")
+
 if(OpenCV_FOUND)
   if(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION)
     set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE)
diff --git a/README.md b/README.md
index 2de8c7bf980..1e839f8a44a 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ About Darknet framework: http://pjreddie.com/darknet/
 
 
 * [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
-* [Requirements (and how to install dependecies)](#requirements)
+* [Requirements (and how to install dependencies)](#requirements)
 * [Pre-trained models](#pre-trained-models)
 * [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
 * [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)
@@ -107,7 +107,7 @@ Others: https://www.youtube.com/user/pjreddie/videos
 #### How to evaluate AP of YOLOv4 on the MS COCO evaluation server
 
 1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip
-2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt
+2. Download list of images for Detection tasks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt
 3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) )
 4. Content of the file `cfg/coco.data` should be
 
@@ -202,13 +202,13 @@ You can get cfg-files by path: `darknet/cfg/`
 * **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell
 * **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions))
 * **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png))
-* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
+* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
 * **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
 
 ### Yolo v4 in other frameworks
 
 * **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4
-* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite
+* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite
     Official TF models: https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/yolo
     For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
 * **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
@@ -226,7 +226,7 @@ You can get cfg-files by path: `darknet/cfg/`
 * **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt
 * **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial
 * **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
-* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
+* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
 * **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
 * **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
 
@@ -287,12 +287,12 @@ On Linux find executable file `./darknet` in the root directory, while on Window
 * Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): 
     `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map`
 * 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights`
-* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
+* Remember to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
 * To process a list of images `data/train.txt` and save results of detection to `result.json` file use: 
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt`
 * To process a list of images `data/train.txt` and save results of detection to `result.txt` use:                             
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt`
-* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
+* Pseudo-labelling - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt`
 * To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
 * To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
@@ -324,11 +324,6 @@ To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware
 Open a shell and type these commands
 
 ```PowerShell
-PS Code/>              git clone https://github.com/microsoft/vcpkg
-PS Code/>              cd vcpkg
-PS Code/vcpkg>         ./bootstrap-vcpkg.sh
-PS Code/vcpkg>         $env:VCPKG_ROOT=$PWD
-PS Code/vcpkg>         cd ..
 PS Code/>              git clone https://github.com/AlexeyAB/darknet
 PS Code/>              cd darknet
 PS Code/darknet>       ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
@@ -359,9 +354,9 @@ Before make, you can set such options in the `Makefile`: [link](https://github.c
 * `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`)
 * `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x
 * `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams
-* `DEBUG=1` to bould debug version of Yolo
+* `DEBUG=1` to build debug version of Yolo
 * `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU
-* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+* `LIBSO=1` to build a library `darknet.so` and binary runnable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
     or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4`
 * `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run
     `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera`
@@ -371,16 +366,17 @@ To run Darknet on Linux use examples from this article, just use `./darknet` ins
 
 ### How to compile on Windows (using `CMake`)
 
-Requires: 
-* MSVS: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
+Requires:
+
+* MSVC: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
 * CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/
 * Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip)
 
-In Windows: 
+In Windows:
 
-* Start (button) -> All programms -> CMake -> CMake (gui) -> 
+* Start (button) -> All programs -> CMake -> CMake (gui) ->
 
-* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64`  -> Finish -> Generate -> Open Project -> 
+* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64`  -> Finish -> Generate -> Open Project ->
 
 * in MS Visual Studio: Select: x64 and Release -> Build -> Build solution
 
@@ -400,11 +396,6 @@ This is the recommended approach to build Darknet on Windows.
 3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands:
 
 ```PowerShell
-PS Code/>              git clone https://github.com/microsoft/vcpkg
-PS Code/>              cd vcpkg
-PS Code/vcpkg>         .\bootstrap-vcpkg.bat
-PS Code/vcpkg>         $env:VCPKG_ROOT=$PWD
-PS Code/vcpkg>         cd ..
 PS Code/>              git clone https://github.com/AlexeyAB/darknet
 PS Code/>              cd darknet
 PS Code/darknet>       .\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
@@ -490,7 +481,7 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an
   * `<object-class>` - integer object number from `0` to `(classes-1)`
   * `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
   * for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
-  * atention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
+  * attention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
 
   For example for `img1.jpg` you will be created `img1.txt` containing:
 
@@ -570,15 +561,15 @@ Usually sufficient 2000 iterations for each class(object), but not less than num
   * **9002** - iteration number (number of batch)
   * **0.60730 avg** - average loss (error) - **the lower, the better**
 
-  When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
+  When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final average loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
   
   Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 
 
 2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them:
 
-For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
+For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to over-fitting. **Over-fitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
 
-![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
+![Over-fitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
 
 To get weights from Early Stopping Point:
 
@@ -592,7 +583,7 @@ To get weights from Early Stopping Point:
 * `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights`
 * `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights`
 
-And comapre last output lines for each weights (7000, 8000, 9000):
+And compare last output lines for each weights (7000, 8000, 9000):
 
 Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union)
 
@@ -610,7 +601,7 @@ So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calcu
 
 Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
 
-* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24
+* **IoU** (intersect over union) - average intersect over union of objects and detections for a certain threshold = 0.24
 
 * **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf
 
@@ -639,7 +630,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol
 
 * my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong.
 
-* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
+* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at different: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
 
 * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects
 
@@ -670,7 +661,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol
 
 * to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file
 
-* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
+* each: `model of object, side, illumination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
 
 * to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5.
 
diff --git a/build.ps1 b/build.ps1
index 7a38d0d250d..e147fb878ef 100755
--- a/build.ps1
+++ b/build.ps1
@@ -1,11 +1,15 @@
 #!/usr/bin/env pwsh
 
 param (
+  [switch]$DisableInteractive = $false,
   [switch]$EnableCUDA = $false,
   [switch]$EnableCUDNN = $false,
   [switch]$EnableOPENCV = $false,
   [switch]$EnableOPENCV_CUDA = $false,
   [switch]$UseVCPKG = $false,
+  [switch]$DoNotUpdateVCPKG = $false,
+  [switch]$DoNotUpdateDARKNET = $false,
+  [switch]$DoNotDeleteBuildFolder = $false,
   [switch]$DoNotSetupVS = $false,
   [switch]$DoNotUseNinja = $false,
   [switch]$ForceCPP = $false,
@@ -13,9 +17,45 @@ param (
   [switch]$ForceGCC8 = $false
 )
 
+if (-Not $DisableInteractive -and -Not $UseVCPKG) {
+  $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $UseVCPKG = $true
+  }
+}
+
+if (-Not $DisableInteractive -and -Not $EnableCUDA -and -Not $IsMacOS) {
+  $Result = Read-Host "Enable CUDA integration (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $EnableCUDA = $true
+  }
+}
+
+if ($EnableCUDA -and -Not $DisableInteractive -and -Not $EnableCUDNN) {
+  $Result = Read-Host "Enable CUDNN optional dependency (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $EnableCUDNN = $true
+  }
+}
+
+if (-Not $DisableInteractive -and -Not $EnableOPENCV) {
+  $Result = Read-Host "Enable OpenCV optional dependency (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $EnableOPENCV = $true
+  }
+}
+
 $number_of_build_workers = 8
 #$additional_build_setup = " -DCMAKE_CUDA_ARCHITECTURES=30"
 
+if ($IsLinux -or $IsMacOS) {
+  $bootstrap_ext = ".sh"
+}
+elseif ($IsWindows) {
+  $bootstrap_ext = ".bat"
+}
+Write-Host "Native shell script extension: ${bootstrap_ext}"
+
 if (-Not $IsWindows) {
   $DoNotSetupVS = $true
 }
@@ -36,7 +76,7 @@ if ($IsWindows -and -Not $env:VCPKG_DEFAULT_TRIPLET) {
 }
 
 if ($EnableCUDA) {
-  if($IsMacOS) {
+  if ($IsMacOS) {
     Write-Host "Cannot enable CUDA on macOS" -ForegroundColor Yellow
     $EnableCUDA = $false
   }
@@ -82,6 +122,12 @@ elseif ($EnableOPENCV_CUDA -and -not $EnableCUDA -and -not $EnableOPENCV) {
 
 if ($UseVCPKG) {
   Write-Host "VCPKG is enabled"
+  if ($DoNotUpdateVCPKG) {
+    Write-Host "VCPKG will not be updated to latest version if found" -ForegroundColor Yellow
+  }
+  else {
+    Write-Host "VCPKG will be updated to latest version if found"
+  }
 }
 else {
   Write-Host "VCPKG is disabled, please pass -UseVCPKG to the script to enable"
@@ -110,6 +156,18 @@ else {
 
 Push-Location $PSScriptRoot
 
+$GIT_EXE = Get-Command git 2> $null | Select-Object -ExpandProperty Definition
+if (-Not $GIT_EXE) {
+  throw "Could not find git, please install it"
+}
+else {
+  Write-Host "Using git from ${GIT_EXE}"
+}
+
+if ((Test-Path "$PSScriptRoot/.git") -and -not $DoNotUpdateDARKNET) {
+  & $GIT_EXE pull
+}
+
 $CMAKE_EXE = Get-Command cmake 2> $null | Select-Object -ExpandProperty Definition
 if (-Not $CMAKE_EXE) {
   throw "Could not find CMake, please install it"
@@ -216,7 +274,10 @@ elseif ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
   Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${RUNVCPKG_VCPKG_ROOT_OUT}"
   $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
-elseif ((Test-Path "$PWD/vcpkg") -and $UseVCPKG) {
+elseif ($UseVCPKG) {
+  if (-Not (Test-Path "$PWD/vcpkg")) {
+    & $GIT_EXE clone https://github.com/microsoft/vcpkg
+  }
   $vcpkg_path = "$PWD/vcpkg"
   $env:VCPKG_ROOT = "$PWD/vcpkg"
   Write-Host "Found vcpkg in $PWD/vcpkg: $PWD/vcpkg"
@@ -227,6 +288,13 @@ else {
   $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF"
 }
 
+if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and -not $DoNotUpdateVCPKG) {
+  Push-Location $vcpkg_path
+  & $GIT_EXE pull
+  & $PWD/bootstrap-vcpkg${bootstrap_ext} -disableMetrics
+  Pop-Location
+}
+
 if (-Not $DoNotSetupVS) {
   if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) {
     $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
@@ -239,7 +307,7 @@ if (-Not $DoNotSetupVS) {
       }
     }
     Pop-Location
-    Write-Host "Visual Studio Command Prompt variables set" -ForegroundColor Yellow
+    Write-Host "Visual Studio Command Prompt variables set"
   }
 
   $tokens = getLatestVisualStudioWithDesktopWorkloadVersion
@@ -270,13 +338,13 @@ if (-Not $DoNotSetupVS) {
 if ($DoNotSetupVS -and $DoNotUseNinja) {
   $generator = "Unix Makefiles"
 }
-Write-Host "Setting up environment to use CMake generator: $generator" -ForegroundColor Yellow
+Write-Host "Setting up environment to use CMake generator: $generator"
 
 if (-Not $IsMacOS -and $EnableCUDA) {
   if ($null -eq (Get-Command "nvcc" -ErrorAction SilentlyContinue)) {
     if (Test-Path env:CUDA_PATH) {
       $env:PATH += ";${env:CUDA_PATH}/bin"
-      Write-Host "Found cuda in ${env:CUDA_PATH}" -ForegroundColor Yellow
+      Write-Host "Found cuda in ${env:CUDA_PATH}"
     }
     else {
       Write-Host "Unable to find CUDA, if necessary please install it or define a CUDA_PATH env variable pointing to the install folder" -ForegroundColor Yellow
@@ -311,12 +379,18 @@ if (-Not($EnableOPENCV)) {
   $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV:BOOL=OFF"
 }
 
-if ($EnableOPENCV_CUDA) {
-  $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV_WITH_CUDA:BOOL=ON"
+if (-Not($EnableOPENCV_CUDA)) {
+  $additional_build_setup = $additional_build_setup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF"
+}
+
+$build_folder = "./build_release"
+if (-Not $DoNotDeleteBuildFolder) {
+  Write-Host "Removing folder $build_folder" -ForegroundColor Yellow
+  Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder
 }
 
-New-Item -Path ./build_release -ItemType directory -Force
-Set-Location build_release
+New-Item -Path $build_folder -ItemType directory -Force
+Set-Location $build_folder
 $cmake_args = "-G `"$generator`" ${additional_build_setup} -S .."
 Write-Host "CMake args: $cmake_args"
 Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList $cmake_args
diff --git a/src/gemm.c b/src/gemm.c
index 519751c0622..84a7e9a6815 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -8,13 +8,29 @@
 #include <float.h>
 #include <string.h>
 #include <stdint.h>
-#ifdef _WIN32
-#include <intrin.h>
-#endif
 #if defined(_OPENMP)
 #include <omp.h>
 #endif
 
+#if defined(_MSC_VER)
+#if defined(_M_ARM) || defined(_M_ARM64)
+static inline uint32_t popcnt(uint32_t v) {
+  v = v - ((v >> 1) & 0x55555555);
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+  return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
+}
+#define POPCNT(x) popcnt((x))
+#define POPCNT64(x) (popcnt((unsigned)(x)) + popcnt((unsigned)((uint64_t)(x) >> 32)))
+#else
+#include <intrin.h>
+#define POPCNT(x) __popcnt(x)
+#define POPCNT64(x) __popcnt64(x)
+#endif
+#elif defined(__GNUC__)
+#define POPCNT(x) __builtin_popcount(x)
+#define POPCNT64(x) __builtin_popcountll(x)
+#endif
+
 #define TILE_M 4 // 4 ops
 #define TILE_N 16 // AVX2 = 2 ops * 8 floats
 #define TILE_K 16 // loop
@@ -230,7 +246,7 @@ void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED,
                     uint64_t b_bit64 = *((uint64_t *)(B + (k_ldb + j) / 8));
                     uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
                     //printf("\n %d \n",__builtin_popcountll(c_bit64)); // gcc
-                    printf("\n %d \n", __popcnt64(c_bit64));    // msvs
+                    printf("\n %d \n", POPCNT64(c_bit64));    // msvs
 
                     int h;
                     for (h = 0; h < 64; ++h)
@@ -298,11 +314,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
                 uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
                 uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
 
-#ifdef WIN32
-                int tmp_count = __popcnt64(c_bit64);
-#else
-                int tmp_count = __builtin_popcountll(c_bit64);
-#endif
+                int tmp_count = POPCNT64(c_bit64);
 
                 if (K - k < 64)  tmp_count = tmp_count - (64 - (K - k));    // remove extra bits
                 count += tmp_count;
@@ -503,16 +515,6 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
     }
 }
 
-static inline int popcnt_32(uint32_t val32) {
-#ifdef WIN32  // Windows MSVS
-    int tmp_count = __popcnt(val32);
-#else   // Linux GCC
-    int tmp_count = __builtin_popcount(val32);
-#endif
-    return tmp_count;
-}
-//----------------------------
-
 #if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__))
 
 #if (defined(_WIN64) && !defined(__MINGW64__))
@@ -925,14 +927,14 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
 
                 // waiting for - CPUID Flags: AVX512VPOPCNTDQ: __m512i _mm512_popcnt_epi32(__m512i a)
                 __m256 count = _mm256_setr_ps(
-                    popcnt_32(_mm256_extract_epi32(xnor256, 0)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 1)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 2)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 3)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 4)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 5)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 6)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 7)));
+                    POPCNT(_mm256_extract_epi32(xnor256, 0)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 1)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 2)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 3)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 4)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 5)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 6)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 7)));
 
                 __m256 val2 = _mm256_set1_ps(2);
                 count = _mm256_mul_ps(count, val2);     // count * 2
@@ -952,7 +954,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
             {
                 PUT_IN_REGISTER uint32_t B_PART = B[s*ldb + j];
                 uint32_t xnor_result = ~(A_PART ^ B_PART);
-                int32_t count = popcnt_32(xnor_result);  // must be Signed int
+                int32_t count = POPCNT(xnor_result);  // must be Signed int
 
                 C[i*ldc + j] += (2 * count - 32) * mean_val;
             }
@@ -1140,13 +1142,7 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
 
 static inline int popcnt128(__m128i n) {
     const __m128i n_hi = _mm_unpackhi_epi64(n, n);
-#if defined(_MSC_VER)
-    return __popcnt64(_mm_cvtsi128_si64(n)) + __popcnt64(_mm_cvtsi128_si64(n_hi));
-#elif defined(__APPLE__) && defined(__clang__)
-    return _mm_popcnt_u64(_mm_cvtsi128_si64(n)) + _mm_popcnt_u64(_mm_cvtsi128_si64(n_hi));
-#else
-    return __popcntq(_mm_cvtsi128_si64(n)) + __popcntq(_mm_cvtsi128_si64(n_hi));
-#endif
+    return POPCNT64(_mm_cvtsi128_si64(n)) + POPCNT64(_mm_cvtsi128_si64(n_hi));
 }
 
 static inline int popcnt256(__m256i n) {
@@ -2021,7 +2017,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
                 PUT_IN_REGISTER uint32_t B_PART = B[s * ldb + j];
                 uint32_t xnor_result = ~(A_PART ^ B_PART);
                 //printf(" xnor_result = %d, ", xnor_result);
-                int32_t count = popcnt_32(xnor_result);  // must be Signed int
+                int32_t count = POPCNT(xnor_result);  // must be Signed int
 
                 C[i*ldc + j] += (2 * count - 32) * mean_val;
                 //c[i*n + j] += count*mean;
@@ -2079,25 +2075,6 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
     }
 }
 
-static inline int popcnt_64(uint64_t val64) {
-#ifdef WIN32  // Windows
-#ifdef _WIN64 // Windows 64-bit
-    int tmp_count = __popcnt64(val64);
-#else         // Windows 32-bit
-    int tmp_count = __popcnt(val64);
-    tmp_count += __popcnt(val64 >> 32);
-#endif
-#else   // Linux
-#if defined(__x86_64__) || defined(__aarch64__)  // Linux 64-bit
-    int tmp_count = __builtin_popcountll(val64);
-#else  // Linux 32-bit
-    int tmp_count = __builtin_popcount(val64);
-    tmp_count += __builtin_popcount(val64 >> 32);
-#endif
-#endif
-    return tmp_count;
-}
-
 void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
     unsigned char *A, int lda,
     unsigned char *B, int ldb,
@@ -2118,7 +2095,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
                 uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
                 uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
 
-                int tmp_count = popcnt_64(c_bit64);
+                int tmp_count = POPCNT64(c_bit64);
 
                 if (K - k < 64)  tmp_count = tmp_count - (64 - (K - k));    // remove extra bits
                 count += tmp_count;
@@ -2518,7 +2495,7 @@ void gemm_nn_bin_transposed_32bit_packed(int M, int N, int K, float ALPHA,
                 PUT_IN_REGISTER uint32_t A_PART = ((uint32_t*)A)[i*lda + s];
                 PUT_IN_REGISTER uint32_t B_PART = ((uint32_t*)B)[j * ldb + s];
                 uint32_t xnor_result = ~(A_PART ^ B_PART);
-                int32_t count = popcnt_32(xnor_result);  // must be Signed int
+                int32_t count = POPCNT(xnor_result);  // must be Signed int
 
                 val += (2 * count - 32) * mean_val;
             }
@@ -2581,7 +2558,7 @@ void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, floa
                             uint32_t weight = ((uint32_t *)packed_weights)[fil*new_lda / 32 + chan*size*size + f_y*size + f_x];
 
                             uint32_t xnor_result = ~(input ^ weight);
-                            int32_t count = popcnt_32(xnor_result); // mandatory Signed int
+                            int32_t count = POPCNT(xnor_result); // mandatory Signed int
                             sum += (2 * count - 32) * mean_val;
                         }
                     }
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index 3ec7e851593..b17edfb5d36 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -70,8 +70,12 @@ static int close_socket(SOCKET s) {
 #define SOCKADDR    struct sockaddr
 #define SOCKADDR_IN  struct sockaddr_in
 #define ADDRPOINTER  unsigned int*
+#ifndef INVALID_SOCKET
 #define INVALID_SOCKET -1
+#endif
+#ifndef SOCKET_ERROR
 #define SOCKET_ERROR   -1
+#endif
 struct _IGNORE_PIPE_SIGNAL
 {
     struct sigaction new_actn, old_actn;
@@ -934,4 +938,3 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim
         }
     }
 }
-
diff --git a/src/httplib.h b/src/httplib.h
index 41fbfb19419..e4678faeeb2 100644
--- a/src/httplib.h
+++ b/src/httplib.h
@@ -126,7 +126,10 @@ using socket_t = SOCKET;
 #include <unistd.h>
 
 using socket_t = int;
-#define INVALID_SOCKET (-1)
+
+#ifndef INVALID_SOCKET
+#define INVALID_SOCKET -1
+#endif
 #endif //_WIN32
 
 #include <array>
diff --git a/vcpkg.json b/vcpkg.json
index a7f66f260fc..c57fe9175bf 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -1,12 +1,11 @@
 {
   "name": "darknet",
-  "version-string": "0.2.5.4",
-  "port-version": 2,
-  "homepage": "https://github.com/alexeyab/darknet",
+  "version-date": "2021-04-16",
   "description": "Darknet is an open source neural network framework written in C and CUDA. You only look once (YOLO) is a state-of-the-art, real-time object detection system, best example of darknet functionalities.",
+  "homepage": "https://github.com/alexeyab/darknet",
   "dependencies": [
-    "stb",
-    "pthreads"
+    "pthreads",
+    "stb"
   ],
   "features": {
     "cuda": {
@@ -22,6 +21,19 @@
         "cudnn"
       ]
     },
+    "full": {
+      "description": "Build darknet fully featured",
+      "dependencies": [
+        {
+          "name": "darknet",
+          "features": [
+            "cuda",
+            "cudnn",
+            "opencv-cuda"
+          ]
+        }
+      ]
+    },
     "opencv-base": {
       "description": "Build darknet with support for latest version of OpenCV",
       "dependencies": [

From b6cd7592183043613f4b41c25e3385e7f752845c Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Sun, 18 Apr 2021 16:49:54 +0200
Subject: [PATCH 02/46] apply feedback from vcpkg ci

---
 src/gemm.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gemm.c b/src/gemm.c
index 84a7e9a6815..5f5c9689c67 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -23,8 +23,18 @@ static inline uint32_t popcnt(uint32_t v) {
 #define POPCNT64(x) (popcnt((unsigned)(x)) + popcnt((unsigned)((uint64_t)(x) >> 32)))
 #else
 #include <intrin.h>
+#ifdef _WIN64
 #define POPCNT(x) __popcnt(x)
 #define POPCNT64(x) __popcnt64(x)
+#else
+static inline int popcnt_64(uint64_t val64) {
+  int tmp_count = __popcnt(val64);
+  tmp_count += __popcnt(val64 >> 32);
+  return tmp_count;
+}
+#define POPCNT(x) __popcnt(x)
+#define POPCNT64(x) popcnt_64(x)
+#endif
 #endif
 #elif defined(__GNUC__)
 #define POPCNT(x) __builtin_popcount(x)
@@ -515,7 +525,7 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
     }
 }
 
-#if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__))
+#if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__) && !defined(_M_ARM64))
 
 #if (defined(_WIN64) && !defined(__MINGW64__))
 #include <intrin.h>

From 8bc512dca323ccace7c4ad7b702d9b30c2866c02 Mon Sep 17 00:00:00 2001
From: Renault Fernandes <32931110+renaultfernandes@users.noreply.github.com>
Date: Tue, 20 Apr 2021 04:28:00 +0530
Subject: [PATCH 03/46] Fix no inference detections bug when cudnn is disabled.
 (#7617)

Fixes issue described here: https://github.com/AlexeyAB/darknet/issues/7616
---
 src/dark_cuda.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/dark_cuda.c b/src/dark_cuda.c
index 30509c51ee4..a0afff493fe 100644
--- a/src/dark_cuda.c
+++ b/src/dark_cuda.c
@@ -123,8 +123,11 @@ cudaStream_t get_cuda_stream() {
     int i = cuda_get_device();
     if (!streamInit[i]) {
         printf("Create CUDA-stream - %d \n", i);
-        //cudaError_t status = cudaStreamCreate(&streamsArray[i], cudaStreamNonBlocking);
+#ifdef CUDNN
         cudaError_t status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamNonBlocking);
+#else
+        cudaError_t status = cudaStreamCreate(&streamsArray[i]);
+#endif
         if (status != cudaSuccess) {
             printf(" cudaStreamCreate error: %d \n", status);
             const char *s = cudaGetErrorString(status);

From 96971deaa79e69b6deb1c77f546bf44bee2bf3f6 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Wed, 21 Apr 2021 03:11:31 +0300
Subject: [PATCH 04/46] Update README.md

---
 README.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 1e839f8a44a..4a4a67f49f8 100644
--- a/README.md
+++ b/README.md
@@ -211,22 +211,25 @@ You can get cfg-files by path: `darknet/cfg/`
 * **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite
     Official TF models: https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/yolo
     For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
-* **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
-* **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) )
-* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn
+* **OpenCV** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
+* **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) (for [Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large) models use https://github.com/Chen-MingChang/pytorch_YOLO_OpenVINO_demo )
 * **PyTorch > ONNX**: 
     * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
     * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
     * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
     * [YOLOv5](https://github.com/ultralytics/yolov5)
 * **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
-* **TensorRT** YOLOv4 on TensorRT+tkDNN: https://github.com/ceccocats/tkDNN
-    For YOLOv3 (-70% faster inference): [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) read [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) implemented yolov3-spp, yolov4, etc.
-* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo
+* **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results
+* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
 * **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt
+* **DirectML** https://github.com/microsoft/DirectML/tree/master/Samples/yolov4
+* **OpenCL** (Intel, AMD, Mali GPUs for macOS & GNU/Linux) https://github.com/sowson/darknet
+* **HIP** for Training and Detection on AMD GPU https://github.com/os-hackathon/darknet
+* **ROS** (Robot Operating System) https://github.com/engcang/ros-yolo-sort
 * **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial
 * **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
 * **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
+* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn
 * **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
 * **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
 

From 91efb7c7f6c00a4616ed58735f7c670908d0a849 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Wed, 21 Apr 2021 18:25:46 +0200
Subject: [PATCH 05/46] Deploy CUDA 11.3 in CI (#7630)

* [nuget] use personal baget repo

* update cuda to 11.3 on windows

* add error messages during build to not have green badge when failed

* remove cygwin script, is untested now; use deploy-cuda.ps1 script also inside setup.ps1
---
 .github/workflows/ccpp.yml             | 77 +++++++++++++++-----------
 build.ps1                              | 12 +++-
 scripts/README.md                      |  9 +--
 scripts/deploy-cuda.ps1                | 26 +++++++++
 scripts/setup.ps1                      | 13 +++--
 scripts/setup.sh                       |  6 ++
 scripts/windows/win_install_cygwin.cmd | 12 ----
 vcpkg.json                             |  2 -
 8 files changed, 97 insertions(+), 60 deletions(-)
 create mode 100644 scripts/deploy-cuda.ps1
 delete mode 100644 scripts/windows/win_install_cygwin.cmd

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index d6c18fda3fd..ac44c9f67f1 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -3,6 +3,7 @@ name: Darknet Continuous Integration
 on: [push, pull_request, workflow_dispatch]
 
 env:
+  #VCPKG_BINARY_SOURCES: 'clear;nuget,githubpackages,readwrite'
   VCPKG_BINARY_SOURCES: 'clear;nuget,vcpkgbinarycache,readwrite'
 
 jobs:
@@ -107,15 +108,20 @@ jobs:
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
-        git clone https://github.com/microsoft/vcpkg;
-        ./vcpkg/bootstrap-vcpkg.sh;
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "githubpackages"
+        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
-        sources add
-        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
-        -storepasswordincleartext
-        -name "vcpkgbinarycache"
-        -username "AlexeyAB"
-        -password "${{ secrets.GITHUB_TOKEN }}"
+        setapikey ${{ secrets.GITHUB_TOKEN }}
+        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
 
     - name: 'Build'
       shell: pwsh
@@ -267,15 +273,20 @@ jobs:
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
-        git clone https://github.com/microsoft/vcpkg;
-        ./vcpkg/bootstrap-vcpkg.sh;
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "githubpackages"
+        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.GITHUB_TOKEN }}
+        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
-        sources add
-        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
-        -storepasswordincleartext
-        -name "vcpkgbinarycache"
-        -username "AlexeyAB"
-        -password "${{ secrets.GITHUB_TOKEN }}"
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
 
     - name: 'Build'
       shell: pwsh
@@ -353,15 +364,20 @@ jobs:
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
       run: >
-        git clone https://github.com/microsoft/vcpkg;
-        ./vcpkg/bootstrap-vcpkg.sh;
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "githubpackages"
+        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
         $(./vcpkg/vcpkg fetch nuget | tail -n 1)
-        sources add
-        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
-        -storepasswordincleartext
-        -name "vcpkgbinarycache"
-        -username "AlexeyAB"
-        -password "${{ secrets.GITHUB_TOKEN }}"
+        setapikey ${{ secrets.GITHUB_TOKEN }}
+        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
 
     - name: 'Build'
       shell: pwsh
@@ -439,20 +455,15 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - name: 'Install CUDA'
-      run: |
-        choco install cuda --version=10.2.89.20191206 -y
-        $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."
-        Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
-        refreshenv
+      run: ./scripts/deploy-cuda.ps1
 
     - uses: lukka/get-cmake@latest
 
     - name: 'Build'
       env:
-        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
-        CUDA_PATH_V10_2: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
-        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
-        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe"
+        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3\\bin\\nvcc.exe"
       shell: pwsh
       run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
diff --git a/build.ps1 b/build.ps1
index e147fb878ef..9574023efd6 100755
--- a/build.ps1
+++ b/build.ps1
@@ -393,8 +393,16 @@ New-Item -Path $build_folder -ItemType directory -Force
 Set-Location $build_folder
 $cmake_args = "-G `"$generator`" ${additional_build_setup} -S .."
 Write-Host "CMake args: $cmake_args"
-Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList $cmake_args
-Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
+$proc = Start-Process -NoNewWindow -Wait -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
+$exitCode = $proc.ExitCode
+if (-not $exitCode -eq 0) {
+  Throw "Config failed! Exited with $exitCode."
+}
+$proc = Start-Process -NoNewWindow -Wait -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
+$exitCode = $proc.ExitCode
+if (-not $exitCode -eq 0) {
+  Throw "Config failed! Exited with $exitCode."
+}
 Remove-Item DarknetConfig.cmake
 Remove-Item DarknetConfigVersion.cmake
 $dllfiles = Get-ChildItem ./${dllfolder}/*.dll
diff --git a/scripts/README.md b/scripts/README.md
index a641e673d0e..91b17224361 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,8 +1,6 @@
+# Datasets
 
-
-### Datasets:
-
-59.26TB of research data: http://academictorrents.com/ 
+59.26TB of research data: http://academictorrents.com/
 
 ImageNet Torrent (Stanford): http://academictorrents.com/browse.php?search=imagenet&page=0
 
@@ -54,7 +52,6 @@ Visual Question Answering: https://visualqa.org/download.html
 
 Large Movie Review Dataset: http://ai.stanford.edu/~amaas/data/sentiment/
 
-
 ----
 
 Wikipedia's List of datasets: https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research
@@ -71,4 +68,4 @@ Pedestrian DATASETs for Vision based Detection and Tracking: https://hemprasad.w
 
 TrackingNet: https://tracking-net.org/
 
-RGB, RGBD, Texture-mapped 3D mesh models: http://www.ycbbenchmarks.com/
\ No newline at end of file
+RGB, RGBD, Texture-mapped 3D mesh models: http://www.ycbbenchmarks.com/
diff --git a/scripts/deploy-cuda.ps1 b/scripts/deploy-cuda.ps1
new file mode 100644
index 00000000000..29847578d3e
--- /dev/null
+++ b/scripts/deploy-cuda.ps1
@@ -0,0 +1,26 @@
+#!/usr/bin/env pwsh
+
+$url = 'https://developer.download.nvidia.com/compute/cuda/11.3.0/network_installers/cuda_11.3.0_win10_network.exe'
+
+$CudaFeatures = 'nvcc_11.3 cuobjdump_11.3 nvprune_11.3 cupti_11.3 memcheck_11.3 nvdisasm_11.3 nvprof_11.3 ' + `
+ 'visual_studio_integration_11.3 visual_profiler_11.3 visual_profiler_11.3 cublas_11.3 cublas_dev_11.3 ' + `
+ 'cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 ' + `
+ 'cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3 ' + `
+ 'occupancy_calculator_11.3 '
+
+try {
+  Write-Host 'Downloading CUDA...'
+  Invoke-WebRequest -Uri $url -OutFile "cuda_11.3.0_win10_network.exe"
+  Write-Host 'Installing CUDA...'
+  $proc = Start-Process -Wait -PassThru -FilePath "./cuda_11.3.0_win10_network.exe" -ArgumentList @('-s ' + $CudaFeatures)
+  $exitCode = $proc.ExitCode
+  if ($exitCode -eq 0) {
+    Write-Host 'Installation successful!'
+  }
+  else {
+    Throw "Installation failed! Exited with $exitCode."
+  }
+}
+catch {
+  Throw "Failed to install CUDA! $($_.Exception.Message)"
+}
diff --git a/scripts/setup.ps1 b/scripts/setup.ps1
index b846bab1656..c5c2ae22b21 100755
--- a/scripts/setup.ps1
+++ b/scripts/setup.ps1
@@ -6,15 +6,15 @@ if ($null -eq (Get-Command "choco.exe" -ErrorAction SilentlyContinue)) {
   # Download and install Chocolatey
   Set-ExecutionPolicy unrestricted -Scope CurrentUser
   Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))
-  Write-Host "Please close and re-open powershell and then re-run setup.ps1 script"
-  Break
+  Throw "Please close and re-open powershell and then re-run setup.ps1 script"
 }
 
 Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y cmake ninja powershell git vscode"
 Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y visualstudio2019buildtools --package-parameters `"--add Microsoft.VisualStudio.Component.VC.CoreBuildTools --includeRecommended --includeOptional --passive --locale en-US --lang en-US`""
+Push-Location $PSScriptRoot
 
 if ($install_cuda) {
-  Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y cuda"
+  & ./deploy-cuda.ps1
   $features = "full"
 }
 else {
@@ -26,7 +26,10 @@ else {
   }
 }
 
-git.exe clone https://github.com/microsoft/vcpkg
-Set-Location vcpkg
+git.exe clone https://github.com/microsoft/vcpkg ../vcpkg
+Set-Location ..\vcpkg
 .\bootstrap-vcpkg.bat -disableMetrics
 .\vcpkg.exe install darknet[${features}]:x64-windows
+Pop-Location
+
+Write-Host "Darknet installed in $pwd\x64-windows\tools\darknet" -ForegroundColor Yellow
diff --git a/scripts/setup.sh b/scripts/setup.sh
index 12a769b8881..c33379e92ef 100755
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@@ -77,3 +77,9 @@ if [[ ! -v VCPKG_ROOT ]]; then
 fi
 
 $VCPKG_ROOT/vcpkg install darknet[${features}]
+
+if [[ "$OSTYPE" == "darwin"* ]]; then
+  echo "Darknet installed in $VCPKG_ROOT/installed/x64-osx/tools/darknet"
+else
+  echo "Darknet installed in $VCPKG_ROOT/installed/x64-linux/tools/darknet"
+fi
diff --git a/scripts/windows/win_install_cygwin.cmd b/scripts/windows/win_install_cygwin.cmd
deleted file mode 100644
index 6a2bda4a812..00000000000
--- a/scripts/windows/win_install_cygwin.cmd
+++ /dev/null
@@ -1,12 +0,0 @@
-echo Download file: https://www.cygwin.com/setup-x86_64.exe
-
-
-setup-x86_64.exe -q -P dos2unix,wget,tar,untar,gzip,unzip,qawk,bzip2,git,vim,gcc-g++,make,grep,sed,find
-
-
-# wget rawgit.com/transcode-open/apt-cyg/master/apt-cyg
-# install apt-cyg /bin
-
-echo Finished
-
-pause
\ No newline at end of file
diff --git a/vcpkg.json b/vcpkg.json
index c57fe9175bf..30caffb7f88 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -67,7 +67,6 @@
         {
           "name": "opencv2",
           "features": [
-            "contrib",
             "ffmpeg"
           ]
         }
@@ -79,7 +78,6 @@
         {
           "name": "opencv2",
           "features": [
-            "contrib",
             "cuda",
             "ffmpeg"
           ]

From 4f794aa15265d2a068d9f452e5efd9d7a3574524 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Thu, 22 Apr 2021 17:03:23 +0200
Subject: [PATCH 06/46] almost revert last PR (#7634)

* [darknet] split pr/ci pipelines to use fast caching only when accessible

* restore original pipeline names

* restore original ccpp file name

* speed up scripts avoiding -wait flag

* restore fast cache in readonly

* clean up scripts removing references to github packages, which are often down/broken
---
 .github/workflows/ccpp.yml  |  21 +-
 .github/workflows/on_pr.yml | 459 ++++++++++++++++++++++++++++++++++++
 build.ps1                   |   6 +-
 scripts/deploy-cuda.ps1     |   3 +-
 4 files changed, 466 insertions(+), 23 deletions(-)
 create mode 100644 .github/workflows/on_pr.yml

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index ac44c9f67f1..dd8a98dd856 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -1,9 +1,8 @@
 name: Darknet Continuous Integration
 
-on: [push, pull_request, workflow_dispatch]
+on: [push, workflow_dispatch]
 
 env:
-  #VCPKG_BINARY_SOURCES: 'clear;nuget,githubpackages,readwrite'
   VCPKG_BINARY_SOURCES: 'clear;nuget,vcpkgbinarycache,readwrite'
 
 jobs:
@@ -111,12 +110,6 @@ jobs:
         git clone https://github.com/microsoft/vcpkg ;
         ./vcpkg/bootstrap-vcpkg.sh ;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
-        -Name "githubpackages"
-        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
-        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
-        setapikey ${{ secrets.GITHUB_TOKEN }}
-        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
-        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
         -Name "vcpkgbinarycache"
         -Source http://93.49.111.10:5555/v3/index.json ;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
@@ -276,12 +269,6 @@ jobs:
         git clone https://github.com/microsoft/vcpkg ;
         ./vcpkg/bootstrap-vcpkg.sh ;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
-        -Name "githubpackages"
-        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
-        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
-        setapikey ${{ secrets.GITHUB_TOKEN }}
-        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
-        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
         -Name "vcpkgbinarycache"
         -Source http://93.49.111.10:5555/v3/index.json ;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
@@ -367,12 +354,6 @@ jobs:
         git clone https://github.com/microsoft/vcpkg ;
         ./vcpkg/bootstrap-vcpkg.sh ;
         $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
-        -Name "githubpackages"
-        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
-        $(./vcpkg/vcpkg fetch nuget | tail -n 1)
-        setapikey ${{ secrets.GITHUB_TOKEN }}
-        -Source https://nuget.pkg.github.com/${{ github.repository_owner }} ;
-        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
         -Name "vcpkgbinarycache"
         -Source http://93.49.111.10:5555/v3/index.json ;
         $(./vcpkg/vcpkg fetch nuget | tail -n 1)
diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml
new file mode 100644
index 00000000000..198d84fc4e0
--- /dev/null
+++ b/.github/workflows/on_pr.yml
@@ -0,0 +1,459 @@
+name: Darknet Pull Requests
+
+on: [pull_request]
+
+env:
+  VCPKG_BINARY_SOURCES: 'clear;nuget,vcpkgbinarycache,read'
+
+jobs:
+  ubuntu-makefile:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install libopencv-dev
+
+    - name: 'Install CUDA'
+      run: |
+        sudo apt update
+        sudo apt-get dist-upgrade -y
+        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
+        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
+        sudo apt-get install -y --no-install-recommends libcudnn8-dev
+        sudo rm -rf /usr/local/cuda
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        nvcc --version
+        gcc --version
+
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=1'
+      run: |
+        make LIBSO=1 GPU=0 CUDNN=0 OPENCV=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1'
+      run: |
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1'
+      run: |
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 -j 8
+        make clean
+    - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1'
+      run: |
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1 -j 8
+        make clean
+
+
+  ubuntu-vcpkg-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      run: |
+        sudo apt update
+        sudo apt-get dist-upgrade -y
+        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
+        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
+        sudo apt-get install -y --no-install-recommends libcudnn8-dev
+        sudo rm -rf /usr/local/cuda
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        nvcc --version
+        gcc --version
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  ubuntu:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install libopencv-dev
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  ubuntu-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install libopencv-dev
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      run: |
+        sudo apt update
+        sudo apt-get dist-upgrade -y
+        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
+        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
+        sudo apt-get install -y --no-install-recommends libcudnn8-dev
+        sudo rm -rf /usr/local/cuda
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
+        export PATH=/usr/local/cuda/bin:$PATH
+        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
+        nvcc --version
+        gcc --version
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-cuda-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  ubuntu-no-ocv-cpp:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
+
+
+  osx-vcpkg:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install dependencies
+      run: brew install libomp yasm nasm
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  osx:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install dependencies
+      run: brew install opencv libomp
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  osx-no-ocv-no-omp-cpp:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
+
+
+  win-vcpkg:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ runner.workspace }}/buildDirectory/Release/*.dll
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-vcpkg-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  win-intlibs:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET
+
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: cfg
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: data
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/*dark*
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/3rdparty/pthreads/bin/*.dll
+    - uses: actions/upload-artifact@v2
+      with:
+        name: darknet-${{ runner.os }}
+        path: ${{ github.workspace }}/uselib*
+
+
+  win-intlibs-cpp:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
+
+
+  win-intlibs-cuda:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.ps1
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      env:
+        CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+        CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+        CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3\\bin\\nvcc.exe"
+      shell: pwsh
+      run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+
+
+  mingw:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build with CMake'
+      uses: lukka/run-cmake@v3
+      with:
+        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
+        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
+        useVcpkgToolchainFile: true
+        buildDirectory: '${{ runner.workspace }}/buildDirectory'
+        cmakeAppendedArgs: "-G\"MinGW Makefiles\" -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=OFF -DENABLE_CUDNN=OFF -DENABLE_OPENCV=OFF"
+        cmakeBuildType: 'Release'
+        buildWithCMakeArgs: '--config Release --target install'
diff --git a/build.ps1 b/build.ps1
index 9574023efd6..8cf090f0a12 100755
--- a/build.ps1
+++ b/build.ps1
@@ -393,12 +393,14 @@ New-Item -Path $build_folder -ItemType directory -Force
 Set-Location $build_folder
 $cmake_args = "-G `"$generator`" ${additional_build_setup} -S .."
 Write-Host "CMake args: $cmake_args"
-$proc = Start-Process -NoNewWindow -Wait -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
+$proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
+$proc.WaitForExit()
 $exitCode = $proc.ExitCode
 if (-not $exitCode -eq 0) {
   Throw "Config failed! Exited with $exitCode."
 }
-$proc = Start-Process -NoNewWindow -Wait -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
+$proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
+$proc.WaitForExit()
 $exitCode = $proc.ExitCode
 if (-not $exitCode -eq 0) {
   Throw "Config failed! Exited with $exitCode."
diff --git a/scripts/deploy-cuda.ps1 b/scripts/deploy-cuda.ps1
index 29847578d3e..83b64d29559 100644
--- a/scripts/deploy-cuda.ps1
+++ b/scripts/deploy-cuda.ps1
@@ -12,7 +12,8 @@ try {
   Write-Host 'Downloading CUDA...'
   Invoke-WebRequest -Uri $url -OutFile "cuda_11.3.0_win10_network.exe"
   Write-Host 'Installing CUDA...'
-  $proc = Start-Process -Wait -PassThru -FilePath "./cuda_11.3.0_win10_network.exe" -ArgumentList @('-s ' + $CudaFeatures)
+  $proc = Start-Process -PassThru -FilePath "./cuda_11.3.0_win10_network.exe" -ArgumentList @('-s ' + $CudaFeatures)
+  $proc.WaitForExit()
   $exitCode = $proc.ExitCode
   if ($exitCode -eq 0) {
     Write-Host 'Installation successful!'

From c9f2c5f0e9b65a5bf12b5110851489e9159f1988 Mon Sep 17 00:00:00 2001
From: deoksangkim <49770966+deoksangkim@users.noreply.github.com>
Date: Mon, 26 Apr 2021 06:58:22 +0900
Subject: [PATCH 07/46] fix memory error in batchnorm layer (#7619)

---
 src/batchnorm_layer.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c
index eeba5cc57b9..6729b031923 100644
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@@ -36,6 +36,12 @@ layer make_batchnorm_layer(int batch, int w, int h, int c, int train)
     layer.rolling_mean = (float*)xcalloc(c, sizeof(float));
     layer.rolling_variance = (float*)xcalloc(c, sizeof(float));
 
+    layer.mean_delta = (float*)xcalloc(c, sizeof(float));
+    layer.variance_delta = (float*)xcalloc(c, sizeof(float));
+
+    layer.x = (float*)xcalloc(layer.batch*layer.outputs, sizeof(float));
+    layer.x_norm = (float*)xcalloc(layer.batch*layer.outputs, sizeof(float));
+
     layer.forward = forward_batchnorm_layer;
     layer.backward = backward_batchnorm_layer;
     layer.update = update_batchnorm_layer;

From 3a0dc55cada5c04d4a7e5292d4dcc1e429609611 Mon Sep 17 00:00:00 2001
From: Renault Fernandes <32931110+renaultfernandes@users.noreply.github.com>
Date: Mon, 26 Apr 2021 03:29:41 +0530
Subject: [PATCH 08/46] Use cublasStatus_t instead of cudaError_t when error
 checking cublas calls (#7626)

Cublas success is indicated by CUBLAS_STATUS_SUCCESS. This must be used
to determine if a cublas function call was successful.

Although Cuda does not complain if we use cudaError_t, when bulding with
ROCm support it is absolutely necessary to use the correct enum
(cublasStatus_t), else ROCm will always interpret the returned
status as an error and crash.
---
 src/dark_cuda.c | 33 +++++++++++++++++++++++++++++++--
 src/dark_cuda.h |  2 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/dark_cuda.c b/src/dark_cuda.c
index a0afff493fe..ceb43c8878b 100644
--- a/src/dark_cuda.c
+++ b/src/dark_cuda.c
@@ -239,6 +239,35 @@ static int switchCudnnInit[16];
 #endif
 
 
+void cublas_check_error(cublasStatus_t status)
+{
+#if defined(DEBUG) || defined(CUDA_DEBUG)
+    cudaDeviceSynchronize();
+#endif
+    if (cuda_debug_sync) {
+        cudaDeviceSynchronize();
+    }
+    if (status != CUBLAS_STATUS_SUCCESS) {
+        printf("cuBLAS Error\n");
+    }
+}
+
+void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time)
+{
+    if (status != CUBLAS_STATUS_SUCCESS) {
+      printf("\n cuBLAS status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+    }
+#if defined(DEBUG) || defined(CUDA_DEBUG)
+    cuda_debug_sync = 1;
+#endif
+    if (cuda_debug_sync) {
+        cudaError_t status = cudaDeviceSynchronize();
+      if (status != CUDA_SUCCESS)
+          printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+    }
+    cublas_check_error(status);
+}
+
 static int blasInit[16] = { 0 };
 static cublasHandle_t blasHandle[16];
 
@@ -246,9 +275,9 @@ cublasHandle_t blas_handle()
 {
     int i = cuda_get_device();
     if (!blasInit[i]) {
-        cublasCreate(&blasHandle[i]);
+        CHECK_CUBLAS(cublasCreate(&blasHandle[i]));
         cublasStatus_t status = cublasSetStream(blasHandle[i], get_cuda_stream());
-        CHECK_CUDA((cudaError_t)status);
+        CHECK_CUBLAS(status);
         blasInit[i] = 1;
     }
     return blasHandle[i];
diff --git a/src/dark_cuda.h b/src/dark_cuda.h
index 10f6d89e385..9251e877672 100644
--- a/src/dark_cuda.h
+++ b/src/dark_cuda.h
@@ -56,7 +56,9 @@ extern "C" {
 #endif // __cplusplus
     void check_error(cudaError_t status);
     void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time);
+    void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time);
 #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
+#define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
 
     cublasHandle_t blas_handle();
     void free_pinned_memory();

From 5c927228de38f4a57256c63944923f1a41f391d5 Mon Sep 17 00:00:00 2001
From: Martin <draconis.sigma@gmail.com>
Date: Mon, 26 Apr 2021 00:01:33 +0200
Subject: [PATCH 09/46] Fix python video (#7611)

* some bbox conversion functions

* output video in original resolution

* fixing drawing of bboxes into orig sized video
---
 darknet_video.py | 71 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 11 deletions(-)

diff --git a/darknet_video.py b/darknet_video.py
index cc20b266606..04895133f97 100644
--- a/darknet_video.py
+++ b/darknet_video.py
@@ -60,16 +60,61 @@ def set_saved_video(input_video, output_video, size):
     return video
 
 
+def convert2relative(bbox):
+    """
+    YOLO format use relative coordinates for annotation
+    """
+    x, y, w, h  = bbox
+    _height     = darknet_height
+    _width      = darknet_width
+    return x/_width, y/_height, w/_width, h/_height
+
+
+def convert2original(image, bbox):
+    x, y, w, h = convert2relative(bbox)
+
+    image_h, image_w, __ = image.shape
+
+    orig_x       = int(x * image_w)
+    orig_y       = int(y * image_h)
+    orig_width   = int(w * image_w)
+    orig_height  = int(h * image_h)
+
+    bbox_converted = (orig_x, orig_y, orig_width, orig_height)
+
+    return bbox_converted
+
+
+def convert4cropping(image, bbox):
+    x, y, w, h = convert2relative(bbox)
+
+    image_h, image_w, __ = image.shape
+
+    orig_left    = int((x - w / 2.) * image_w)
+    orig_right   = int((x + w / 2.) * image_w)
+    orig_top     = int((y - h / 2.) * image_h)
+    orig_bottom  = int((y + h / 2.) * image_h)
+
+    if (orig_left < 0): orig_left = 0
+    if (orig_right > image_w - 1): orig_right = image_w - 1
+    if (orig_top < 0): orig_top = 0
+    if (orig_bottom > image_h - 1): orig_bottom = image_h - 1
+
+    bbox_cropping = (orig_left, orig_top, orig_right, orig_bottom)
+
+    return bbox_cropping
+
+
 def video_capture(frame_queue, darknet_image_queue):
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        frame_resized = cv2.resize(frame_rgb, (width, height),
+        frame_resized = cv2.resize(frame_rgb, (darknet_width, darknet_height),
                                    interpolation=cv2.INTER_LINEAR)
-        frame_queue.put(frame_resized)
-        img_for_detect = darknet.make_image(width, height, 3)
+        frame_queue.put(frame)
+        img_for_detect = darknet.make_image(darknet_width, darknet_height, 3)
         darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes())
         darknet_image_queue.put(img_for_detect)
     cap.release()
@@ -91,18 +136,22 @@ def inference(darknet_image_queue, detections_queue, fps_queue):
 
 def drawing(frame_queue, detections_queue, fps_queue):
     random.seed(3)  # deterministic bbox colors
-    video = set_saved_video(cap, args.out_filename, (width, height))
+    video = set_saved_video(cap, args.out_filename, (darknet_width, darknet_height))
     while cap.isOpened():
-        frame_resized = frame_queue.get()
+        frame = frame_queue.get()
         detections = detections_queue.get()
         fps = fps_queue.get()
-        if frame_resized is not None:
-            image = darknet.draw_boxes(detections, frame_resized, class_colors)
+        detections_adjusted = []
+        if frame is not None:
+            for label, confidence, bbox in detections:
+                bbox_adjusted = convert2original(frame, bbox)
+                detections_adjusted.append((str(label), confidence, bbox_adjusted))
+            image = darknet.draw_boxes(detections_adjusted, frame, class_colors)
+            if not args.dont_show:
+                cv2.imshow('Inference', image)
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             if args.out_filename is not None:
                 video.write(image)
-            if not args.dont_show:
-                cv2.imshow('Inference', image)
             if cv2.waitKey(fps) == 27:
                 break
     cap.release()
@@ -124,8 +173,8 @@ def drawing(frame_queue, detections_queue, fps_queue):
             args.weights,
             batch_size=1
         )
-    width = darknet.network_width(network)
-    height = darknet.network_height(network)
+    darknet_width = darknet.network_width(network)
+    darknet_height = darknet.network_height(network)
     input_path = str2int(args.input)
     cap = cv2.VideoCapture(input_path)
     Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()

From 98fa47c844d76e7a15f0f3f2bcc94fd222fcf78b Mon Sep 17 00:00:00 2001
From: Juan Toca Mateo <elan17.programacion@gmail.com>
Date: Mon, 26 Apr 2021 00:01:52 +0200
Subject: [PATCH 10/46] Fixed relative paths in darknet_images.py (#7566)

---
 darknet_images.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/darknet_images.py b/darknet_images.py
index 0f1fca9b7ae..17ac91731a7 100644
--- a/darknet_images.py
+++ b/darknet_images.py
@@ -162,7 +162,7 @@ def save_annotations(name, image, detections, class_names):
     """
     Files saved with image_name.txt and relative coordinates
     """
-    file_name = name.split(".")[:-1][0] + ".txt"
+    file_name = os.path.splitext(name)[0] + ".txt"
     with open(file_name, "w") as f:
         for label, confidence, bbox in detections:
             x, y, w, h = convert2relative(image, bbox)

From e2a128737bcee224088a0076629983e2a4beca01 Mon Sep 17 00:00:00 2001
From: ILLO YOON <yildbs@gmail.com>
Date: Mon, 26 Apr 2021 07:05:41 +0900
Subject: [PATCH 11/46] gaussian_yolo_layer truth size mismatch fixed due to
 the update of track_id (#7578)

---
 src/gaussian_yolo_layer.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c
index bfef6991634..bd99a89dc6b 100644
--- a/src/gaussian_yolo_layer.c
+++ b/src/gaussian_yolo_layer.c
@@ -51,7 +51,8 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m
     l.outputs = h*w*n*(classes + 8 + 1);
     l.inputs = l.outputs;
     l.max_boxes = max_boxes;
-    l.truths = l.max_boxes*(4 + 1);
+	l.truth_size = 4 + 2;
+    l.truths = l.max_boxes*l.truth_size;
     l.delta = (float*)calloc(batch*l.outputs, sizeof(float));
     l.output = (float*)calloc(batch*l.outputs, sizeof(float));
     for(i = 0; i < total*2; ++i){
@@ -464,8 +465,8 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                     float best_iou = 0;
                     int best_t = 0;
                     for(t = 0; t < l.max_boxes; ++t){
-                        box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1);
-                        int class_id = state.truth[t*(4 + 1) + b*l.truths + 4];
+                        box truth = float_to_box_stride(state.truth + t*l.truth_size + b*l.truths, 1);
+                        int class_id = state.truth[t*l.truth_size + b*l.truths + 4];
                         if (class_id >= l.classes) {
                             printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1);
                             printf(" truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id);
@@ -496,7 +497,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                         if (l.objectness_smooth) {
                             l.delta[obj_index] = l.obj_normalizer * (iou_multiplier - l.output[obj_index]);
 
-                            int class_id = state.truth[best_match_t*(4 + 1) + b*l.truths + 4];
+                            int class_id = state.truth[best_match_t*l.truth_size + b*l.truths + 4];
                             if (l.map) class_id = l.map[class_id];
                             delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer);
                         }
@@ -518,19 +519,19 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                         else l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]);
                         //l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]);
 
-                        int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4];
+                        int class_id = state.truth[best_t*l.truth_size + b*l.truths + 4];
                         if (l.map) class_id = l.map[class_id];
                         delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer);
                         const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
                         if (l.objectness_smooth) l.delta[class_index + stride*class_id] = class_multiplier * (iou_multiplier - l.output[class_index + stride*class_id]);
-                        box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
+                        box truth = float_to_box_stride(state.truth + best_t*l.truth_size + b*l.truths, 1);
                         delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
                     }
                 }
             }
         }
         for(t = 0; t < l.max_boxes; ++t){
-            box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1);
+            box truth = float_to_box_stride(state.truth + t*l.truth_size + b*l.truths, 1);
 
             if(!truth.x) break;
             float best_iou = 0;
@@ -564,7 +565,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
 
             int mask_n = int_index(l.mask, best_n, l.n);
             if(mask_n >= 0){
-                int class_id = state.truth[t*(4 + 1) + b*l.truths + 4];
+                int class_id = state.truth[t*l.truth_size + b*l.truths + 4];
                 if (l.map) class_id = l.map[class_id];
 
                 int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
@@ -597,7 +598,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                     // iou, n
 
                     if (iou > l.iou_thresh) {
-                        int class_id = state.truth[t*(4 + 1) + b*l.truths + 4];
+                        int class_id = state.truth[t*l.truth_size + b*l.truths + 4];
                         if (l.map) class_id = l.map[class_id];
 
                         int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);

From 846c79b6d4cf78f27fa4adb5341ad88cf581f0c3 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Tue, 11 May 2021 22:59:21 +0300
Subject: [PATCH 12/46] Added [empty]/[silence] and [implicit] layers

---
 Makefile                                      |    2 +-
 build/darknet/darknet.vcxproj                 |    2 +
 .../x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg | 1429 +++++++++++++++++
 build/darknet/x64/darknet.py                  |    2 +
 build/darknet/x64/darknet_python.cmd          |    2 +-
 build/darknet/x64/partial.cmd                 |    5 +-
 build/darknet/yolo_cpp_dll.vcxproj            |    8 +-
 cfg/yolov4-sam-mish-csp-reorg-bfm.cfg         | 1429 +++++++++++++++++
 include/darknet.h                             |    3 +-
 src/blas.h                                    |    3 +
 src/blas_kernels.cu                           |   35 +-
 src/parser.c                                  |   76 +-
 src/representation_layer.c                    |  160 ++
 src/representation_layer.h                    |   29 +
 14 files changed, 3173 insertions(+), 12 deletions(-)
 create mode 100644 build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg
 create mode 100644 cfg/yolov4-sam-mish-csp-reorg-bfm.cfg
 create mode 100644 src/representation_layer.c
 create mode 100644 src/representation_layer.h

diff --git a/Makefile b/Makefile
index 5fb70543746..431933ca058 100644
--- a/Makefile
+++ b/Makefile
@@ -151,7 +151,7 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed
 endif
 endif
 
-OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
+OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o representation_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o
 ifeq ($(GPU), 1)
 LDFLAGS+= -lstdc++
 OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj
index 5282de26d91..5c8a7c3e1b9 100644
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@@ -223,6 +223,7 @@
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
     <ClCompile Include="..\..\src\reorg_old_layer.c" />
+    <ClCompile Include="..\..\src\representation_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -286,6 +287,7 @@
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
     <ClInclude Include="..\..\src\reorg_old_layer.h" />
+    <ClInclude Include="..\..\src\representation_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\sam_layer.h" />
diff --git a/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg
new file mode 100644
index 00000000000..1461d88838e
--- /dev/null
+++ b/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg
@@ -0,0 +1,1429 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=512
+height=512
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+#:104x104 54:52x52 85:26x26 104:13x13 for 416
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-7
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-10
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-16
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+stopbackward=800
+
+
+##########################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 79
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -4, -7
+
+[upsample]
+stride=2
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 79
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = 48
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -4, -6
+
+[shortcut]
+from= -10
+activation=linear
+
+[upsample]
+stride=2
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 48
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = 17
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -4, -6
+
+[shortcut]
+from= -19
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+#### BFM-1
+
+[route]
+layers = 17
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=256
+activation=logistic
+
+[sam]
+from=-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+##########################
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.2
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+scale_x_y = 1.2
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+uc_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+beta1=0.6
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 137
+#layers = -1, -20
+
+[route]
+layers = -17
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=512
+activation=logistic
+
+[sam]
+from=-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.2
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+scale_x_y = 1.1
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+uc_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+beta1=0.6
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 126
+# layers = -1, -49 
+
+[route]
+layers = -17
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=1024
+activation=logistic
+
+[sam]
+from=-2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+scale_x_y = 1.05
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+uc_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+beta1=0.6
+max_delta=5
diff --git a/build/darknet/x64/darknet.py b/build/darknet/x64/darknet.py
index bb0ee183688..23539e0b64d 100644
--- a/build/darknet/x64/darknet.py
+++ b/build/darknet/x64/darknet.py
@@ -25,6 +25,8 @@
 import random
 import os
 
+print("Run: darknet_images.py or:\n")
+print("python.exe darknet_video.py --data_file cfg/coco.data --config_file cfg/yolov4.cfg --weights yolov4.weights --input test.mp4 \n")
 
 class BOX(Structure):
     _fields_ = [("x", c_float),
diff --git a/build/darknet/x64/darknet_python.cmd b/build/darknet/x64/darknet_python.cmd
index b2df11d8207..74556582e82 100644
--- a/build/darknet/x64/darknet_python.cmd
+++ b/build/darknet/x64/darknet_python.cmd
@@ -14,6 +14,6 @@ rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install sci
 rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install scipy
 rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install opencv-python
 
-C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet.py
+C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet_images.py
 
 pause
\ No newline at end of file
diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd
index 26a701a9c40..a9b06ca86b7 100644
--- a/build/darknet/x64/partial.cmd
+++ b/build/darknet/x64/partial.cmd
@@ -8,7 +8,10 @@ rem darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-vo
 darknet.exe partial cfg/yolov4-tiny.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29
 
 
-darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137
+rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137
+
+rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.105 105
+
 
 pause
 
diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj
index 4bd09206caa..35742aa2be4 100644
--- a/build/darknet/yolo_cpp_dll.vcxproj
+++ b/build/darknet/yolo_cpp_dll.vcxproj
@@ -52,7 +52,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -155,7 +155,7 @@
     </Link>
     <CudaCompile>
       <TargetMachinePlatform>64</TargetMachinePlatform>
-      <CodeGeneration>compute_30,sm_30;compute_75,sm_75</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_75,sm_75</CodeGeneration>
     </CudaCompile>
   </ItemDefinitionGroup>
   <ItemGroup>
@@ -225,6 +225,7 @@
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
     <ClCompile Include="..\..\src\reorg_old_layer.c" />
+    <ClCompile Include="..\..\src\representation_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -290,6 +291,7 @@
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
     <ClInclude Include="..\..\src\reorg_old_layer.h" />
+    <ClInclude Include="..\..\src\representation_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\sam_layer.h" />
@@ -306,6 +308,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.targets" />
   </ImportGroup>
 </Project>
\ No newline at end of file
diff --git a/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg
new file mode 100644
index 00000000000..1461d88838e
--- /dev/null
+++ b/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg
@@ -0,0 +1,1429 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=512
+height=512
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+#:104x104 54:52x52 85:26x26 104:13x13 for 416
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-7
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-10
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-28
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1,-16
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+stopbackward=800
+
+
+##########################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 79
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -4, -7
+
+[upsample]
+stride=2
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 79
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = 48
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -4, -6
+
+[shortcut]
+from= -10
+activation=linear
+
+[upsample]
+stride=2
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 48
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = 17
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -4, -6
+
+[shortcut]
+from= -19
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+#### BFM-1
+
+[route]
+layers = 17
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[route]
+layers = -1, -6
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=256
+activation=logistic
+
+[sam]
+from=-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+##########################
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.2
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+scale_x_y = 1.2
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+uc_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+beta1=0.6
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 137
+#layers = -1, -20
+
+[route]
+layers = -17
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=512
+activation=logistic
+
+[sam]
+from=-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.2
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+scale_x_y = 1.1
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+uc_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+beta1=0.6
+max_delta=5
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 126
+# layers = -1, -49 
+
+[route]
+layers = -17
+
+[reorg3d]
+stride=2
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-6
+
+[convolutional]
+batch_normalize=1
+size=1
+stride=1
+pad=1
+filters=1024
+activation=logistic
+
+[sam]
+from=-2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
+classes=80
+num=9
+jitter=.1
+ignore_thresh = .7
+truth_thresh = 1
+random=0
+resize=1.5
+scale_x_y = 1.05
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+uc_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+beta1=0.6
+max_delta=5
diff --git a/include/darknet.h b/include/darknet.h
index 1a5fdf75586..f5170f375cd 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -192,7 +192,8 @@ typedef enum {
     L2NORM,
     EMPTY,
     BLANK,
-    CONTRASTIVE
+    CONTRASTIVE,
+    IMPLICIT
 } LAYER_TYPE;
 
 // layer.h
diff --git a/src/blas.h b/src/blas.h
index ab888903b54..b69a702fa93 100644
--- a/src/blas.h
+++ b/src/blas.h
@@ -174,6 +174,9 @@ void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, floa
 void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size);
 void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type);
 
+void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu);
+void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu);
+
 #endif // GPU
 #ifdef __cplusplus
 }
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index bc327995eed..21dbfbba575 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -2435,4 +2435,37 @@ extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int
     coord_conv_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (dst, w, h, chan, b, type);
 
     CHECK_CUDA(cudaPeekAtLastError());
-}
\ No newline at end of file
+}
+
+
+__global__ void forward_implicit_kernel(int size, int batch, int nweights, float *weight_gpu, float *output_gpu)
+{
+    const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if (id >= size) return;
+
+    output_gpu[id] = weight_gpu[id % nweights];
+}
+
+extern "C" void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu)
+{
+    int size = batch * nweights;
+    forward_implicit_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, batch, nweights, weight_gpu, output_gpu);
+    CHECK_CUDA(cudaPeekAtLastError());
+}
+
+
+
+__global__ void backward_implicit_kernel(int size, int batch, int nweights, float *weight_updates_gpu, float *delta_gpu)
+{
+    const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if (id >= size) return;
+
+    weight_updates_gpu[id % nweights] += delta_gpu[id];
+}
+
+extern "C" void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu)
+{
+    int size = batch * nweights;
+    backward_implicit_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, batch, nweights, weight_updates_gpu, delta_gpu);
+    CHECK_CUDA(cudaPeekAtLastError());
+}
diff --git a/src/parser.c b/src/parser.c
index e7498d9daee..8f8f584268e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -39,6 +39,11 @@
 #include "version.h"
 #include "yolo_layer.h"
 #include "gaussian_yolo_layer.h"
+#include "representation_layer.h"
+
+void empty_func(dropout_layer l, network_state state) {
+    //l.output_gpu = state.input;
+}
 
 typedef struct{
     char *type;
@@ -90,7 +95,9 @@ LAYER_TYPE string_to_layer_type(char * type)
     if (strcmp(type, "[contrastive]") == 0) return CONTRASTIVE;
     if (strcmp(type, "[route]")==0) return ROUTE;
     if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
-    if (strcmp(type, "[empty]") == 0) return EMPTY;
+    if (strcmp(type, "[empty]") == 0
+        || strcmp(type, "[silence]") == 0) return EMPTY;
+    if (strcmp(type, "[implicit]") == 0) return IMPLICIT;
     return BLANK;
 }
 
@@ -1036,6 +1043,17 @@ layer parse_sam(list *options, size_params params, network net)
     return s;
 }
 
+layer parse_implicit(list *options, size_params params, network net)
+{
+    float mean_init = option_find_float(options, "mean", 0.0);
+    float std_init = option_find_float(options, "std", 0.2);
+    int filters = option_find_int(options, "filters", 128);
+    int atoms = option_find_int_quiet(options, "atoms", 1);
+
+    layer s = make_implicit_layer(params.batch, params.index, mean_init, std_init, filters, atoms);
+
+    return s;
+}
 
 layer parse_activation(list *options, size_params params)
 {
@@ -1480,6 +1498,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
             net.layers[count - 1].use_bin_output = 0;
             net.layers[l.index].use_bin_output = 0;
             net.layers[l.index].keep_delta_gpu = 1;
+        } else if (lt == IMPLICIT) {
+            l = parse_implicit(options, params, net);
         }else if(lt == DROPOUT){
             l = parse_dropout(options, params);
             l.output = net.layers[count-1].output;
@@ -1492,16 +1512,25 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         }
         else if (lt == EMPTY) {
             layer empty_layer = {(LAYER_TYPE)0};
-            empty_layer.out_w = params.w;
-            empty_layer.out_h = params.h;
-            empty_layer.out_c = params.c;
             l = empty_layer;
+            l.type = EMPTY;
+            l.w = l.out_w = params.w;
+            l.h = l.out_h = params.h;
+            l.c = l.out_c = params.c;
+            l.batch = params.batch;
+            l.inputs = l.outputs = params.inputs;
             l.output = net.layers[count - 1].output;
             l.delta = net.layers[count - 1].delta;
+            l.forward = empty_func;
+            l.backward = empty_func;
 #ifdef GPU
             l.output_gpu = net.layers[count - 1].output_gpu;
             l.delta_gpu = net.layers[count - 1].delta_gpu;
+            l.keep_delta_gpu = 1;
+            l.forward_gpu = empty_func;
+            l.backward_gpu = empty_func;
 #endif
+            fprintf(stderr, "empty \n");
         }else{
             fprintf(stderr, "Type not recognized: %s\n", s->type);
         }
@@ -1604,6 +1633,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
         l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
         option_unused(options);
+
         net.layers[count] = l;
         if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
         if (l.inputs > max_inputs) max_inputs = l.inputs;
@@ -1810,6 +1840,24 @@ void save_shortcut_weights(layer l, FILE *fp)
     fwrite(l.weights, sizeof(float), num, fp);
 }
 
+void save_implicit_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+    if (gpu_index >= 0) {
+        pull_implicit_layer(l);
+        //printf("\n pull_implicit_layer \n");
+    }
+#endif
+    int i;
+    //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]);
+    //printf(" l.nweights = %d - update \n", l.nweights);
+    //for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
+    //printf(" l.nweights = %d \n\n", l.nweights);
+
+    int num = l.nweights;
+    fwrite(l.weights, sizeof(float), num, fp);
+}
+
 void save_convolutional_weights(layer l, FILE *fp)
 {
     if(l.binary){
@@ -1921,6 +1969,8 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema)
             }
         } if (l.type == SHORTCUT && l.nweights > 0) {
             save_shortcut_weights(l, fp);
+        } if (l.type == IMPLICIT) {
+            save_implicit_weights(l, fp);
         } if(l.type == CONNECTED){
             save_connected_weights(l, fp);
         } if(l.type == BATCHNORM){
@@ -2131,6 +2181,21 @@ void load_shortcut_weights(layer l, FILE *fp)
 #endif
 }
 
+void load_implicit_weights(layer l, FILE *fp)
+{
+    int num = l.nweights;
+    int read_bytes;
+    read_bytes = fread(l.weights, sizeof(float), num, fp);
+    if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);
+    //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
+    //printf(" read_bytes = %d \n\n", read_bytes);
+#ifdef GPU
+    if (gpu_index >= 0) {
+        push_implicit_layer(l);
+    }
+#endif
+}
+
 void load_weights_upto(network *net, char *filename, int cutoff)
 {
 #ifdef GPU
@@ -2175,6 +2240,9 @@ void load_weights_upto(network *net, char *filename, int cutoff)
         if (l.type == SHORTCUT && l.nweights > 0) {
             load_shortcut_weights(l, fp);
         }
+        if (l.type == IMPLICIT) {
+            load_implicit_weights(l, fp);
+        }
         if(l.type == CONNECTED){
             load_connected_weights(l, fp, transpose);
         }
diff --git a/src/representation_layer.c b/src/representation_layer.c
new file mode 100644
index 00000000000..a6cc5c5f3f8
--- /dev/null
+++ b/src/representation_layer.c
@@ -0,0 +1,160 @@
+#include "representation_layer.h"
+#include "utils.h"
+#include "dark_cuda.h"
+#include "blas.h"
+#include <stdio.h>
+#include <assert.h>
+
+layer make_implicit_layer(int batch, int index, float mean_init, float std_init, int filters, int atoms)
+{
+    fprintf(stderr,"implicit Layer: %d x %d \t mean=%.2f, std=%.2f \n", filters, atoms, mean_init, std_init);
+    layer l = { (LAYER_TYPE)0 };
+    l.type = IMPLICIT;
+    l.batch = batch;
+    l.w = 1;
+    l.h = 1;
+    l.c = 1;
+
+    l.out_w = 1;
+    l.out_h = atoms;
+    l.out_c = filters;
+
+    l.outputs = l.out_w*l.out_h*l.out_c;
+    l.inputs = 1;
+    l.index = index;
+
+    l.nweights = l.out_w * l.out_h * l.out_c;
+
+    l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float));
+    l.weights = (float*)xcalloc(l.nweights, sizeof(float));
+    int i;
+    for (i = 0; i < l.nweights; ++i) l.weights[i] = mean_init + rand_uniform(-std_init, std_init);
+
+
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
+
+    l.forward = forward_implicit_layer;
+    l.backward = backward_implicit_layer;
+    l.update = update_implicit_layer;
+#ifdef GPU
+    l.forward_gpu = forward_implicit_layer_gpu;
+    l.backward_gpu = backward_implicit_layer_gpu;
+    l.update_gpu = update_implicit_layer_gpu;
+
+    l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
+    l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
+
+    l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights);
+    l.weights_gpu = cuda_make_array(l.weights, l.nweights);
+#endif
+    return l;
+}
+
+void resize_implicit_layer(layer *l, int w, int h)
+{
+}
+
+void forward_implicit_layer(const layer l, network_state state)
+{
+    int i;
+    #pragma omp parallel for
+    for (i = 0; i < l.nweights * l.batch; ++i) {
+        l.output[i] = l.weights[i % l.nweights];
+    }
+}
+
+void backward_implicit_layer(const layer l, network_state state)
+{
+    int i;
+    #pragma omp parallel for
+    for (i = 0; i < l.nweights * l.batch; ++i) {
+        l.weight_updates[i % l.nweights] += l.delta[i];
+    }
+}
+
+void update_implicit_layer(layer l, int batch, float learning_rate_init, float momentum, float decay)
+{
+    float learning_rate = learning_rate_init*l.learning_rate_scale;
+    //float momentum = a.momentum;
+    //float decay = a.decay;
+    //int batch = a.batch;
+
+    axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
+    axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
+    scal_cpu(l.nweights, momentum, l.weight_updates, 1);
+
+}
+
+
+#ifdef GPU
+void forward_implicit_layer_gpu(const layer l, network_state state)
+{
+    forward_implicit_gpu(l.batch, l.nweights, l.weights_gpu, l.output_gpu);
+}
+
+void backward_implicit_layer_gpu(const layer l, network_state state)
+{
+    backward_implicit_gpu(l.batch, l.nweights, l.weight_updates_gpu, l.delta_gpu);
+}
+
+void update_implicit_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale)
+{
+    // Loss scale for Mixed-Precision on Tensor-Cores
+    float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale;
+    //float momentum = a.momentum;
+    //float decay = a.decay;
+    //int batch = a.batch;
+
+    reset_nan_and_inf(l.weight_updates_gpu, l.nweights);
+    fix_nan_and_inf(l.weights_gpu, l.nweights);
+
+    if (l.adam) {
+        //adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t);
+        adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.nweights, batch, l.t);
+    }
+    else {
+        //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
+        //axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
+        //scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1);
+
+        axpy_ongpu(l.nweights, -decay*batch*loss_scale, l.weights_gpu, 1, l.weight_updates_gpu, 1);
+        axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
+
+        scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1);
+    }
+
+    if (l.clip) {
+        constrain_ongpu(l.nweights, l.clip, l.weights_gpu, 1);
+    }
+}
+
+void pull_implicit_layer(layer l)
+{
+    cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights);
+    cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights);
+
+    if (l.adam) {
+        cuda_pull_array_async(l.m_gpu, l.m, l.nweights);
+        cuda_pull_array_async(l.v_gpu, l.v, l.nweights);
+    }
+    CHECK_CUDA(cudaPeekAtLastError());
+    cudaStreamSynchronize(get_cuda_stream());
+}
+
+void push_implicit_layer(layer l)
+{
+    cuda_push_array(l.weights_gpu, l.weights, l.nweights);
+
+    if (l.train) {
+        cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
+    }
+    if (l.adam) {
+        cuda_push_array(l.m_gpu, l.m, l.nweights);
+        cuda_push_array(l.v_gpu, l.v, l.nweights);
+    }
+    CHECK_CUDA(cudaPeekAtLastError());
+}
+#endif
+
+
diff --git a/src/representation_layer.h b/src/representation_layer.h
new file mode 100644
index 00000000000..8b2a9da3082
--- /dev/null
+++ b/src/representation_layer.h
@@ -0,0 +1,29 @@
+#ifndef REPRESENTATION_LAYER_H
+#define REPRESENTATION_LAYER_H
+
+#include "layer.h"
+#include "network.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+layer make_implicit_layer(int batch, int index, float mean_init, float std_init, int filters, int atoms);
+void forward_implicit_layer(const layer l, network_state state);
+void backward_implicit_layer(const layer l, network_state state);
+void update_implicit_layer(layer l, int batch, float learning_rate_init, float momentum, float decay);
+
+void resize_implicit_layer(layer *l, int w, int h);
+
+#ifdef GPU
+void forward_implicit_layer_gpu(const layer l, network_state state);
+void backward_implicit_layer_gpu(const layer l, network_state state);
+
+void update_implicit_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale);
+void pull_implicit_layer(layer l);
+void push_implicit_layer(layer l);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // REPRESENTATION_LAYER_H

From 432623aa715393125196dbf94a964c1519b1739e Mon Sep 17 00:00:00 2001
From: Double <ethan9141@gmail.com>
Date: Wed, 12 May 2021 04:11:20 +0800
Subject: [PATCH 13/46] fix: frame delay (#7659)

* fix: frame delay

when use low performance device such as raspberry pi or jetson nano execute darknet demo, the streaming will be delayed more and more

* fix: operate only in live stream

Co-authored-by: Double.c <double.c@inwinstack.com>
---
 src/demo.c           | 5 +++++
 src/image_opencv.cpp | 9 +++++++++
 src/image_opencv.h   | 2 +-
 src/utils.c          | 5 +++++
 src/utils.h          | 2 ++
 5 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/demo.c b/src/demo.c
index d503aa7fcfc..ad1cc6e041c 100644
--- a/src/demo.c
+++ b/src/demo.c
@@ -36,6 +36,7 @@ static float demo_thresh = 0;
 static int demo_ext_output = 0;
 static long long int frame_id = 0;
 static int demo_json_port = -1;
+static bool demo_skip_frame = false;
 
 
 static int avg_frames;
@@ -59,6 +60,8 @@ void *fetch_in_thread(void *ptr)
     while (!custom_atomic_load_int(&flag_exit)) {
         while (!custom_atomic_load_int(&run_fetch_in_thread)) {
             if (custom_atomic_load_int(&flag_exit)) return 0;
+            if (demo_skip_frame)
+                consume_frame(cap);
             this_thread_yield();
         }
         int dont_close_stream = 0;    // set 1 if your IP-camera periodically turns off and turns on video-stream
@@ -168,9 +171,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     if(filename){
         printf("video file: %s\n", filename);
         cap = get_capture_video_stream(filename);
+        demo_skip_frame = is_live_stream(filename);
     }else{
         printf("Webcam index: %d\n", cam_index);
         cap = get_capture_webcam(cam_index);
+        demo_skip_frame = true;
     }
 
     if (!cap) {
diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp
index add27c96ef0..f9615cb00ae 100644
--- a/src/image_opencv.cpp
+++ b/src/image_opencv.cpp
@@ -834,6 +834,15 @@ extern "C" image get_image_from_stream_letterbox(cap_cv *cap, int w, int h, int
 }
 // ----------------------------------------
 
+extern "C" void consume_frame(cap_cv *cap){
+    cv::Mat *src = NULL;
+    src = (cv::Mat *)get_capture_frame_cv(cap);
+    if (src)
+        delete src;
+}
+// ----------------------------------------
+
+
 // ====================================================================
 // Image Saving
 // ====================================================================
diff --git a/src/image_opencv.h b/src/image_opencv.h
index 6fa6cb5c6b0..3aeb4478390 100644
--- a/src/image_opencv.h
+++ b/src/image_opencv.h
@@ -83,7 +83,7 @@ int set_capture_position_frame_cv(cap_cv *cap, int index);
 image get_image_from_stream_cpp(cap_cv *cap);
 image get_image_from_stream_resize(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close);
 image get_image_from_stream_letterbox(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close);
-
+void consume_frame(cap_cv *cap);
 
 // Image Saving
 void save_cv_png(mat_cv *img, const char *name);
diff --git a/src/utils.c b/src/utils.c
index fe5c2062148..e4a2298e762 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -1047,3 +1047,8 @@ unsigned long custom_hash(char *str)
 
     return hash;
 }
+
+bool is_live_stream(const char * path){
+    const char *url_schema = "://";
+    return (NULL != strstr(path, url_schema));
+}
diff --git a/src/utils.h b/src/utils.h
index 9a154ea6267..a217b425a2e 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -5,6 +5,7 @@
 
 #include <stdio.h>
 #include <time.h>
+#include <stdbool.h>
 
 #ifndef M_PI
 #define M_PI       3.14159265358979323846   // pi
@@ -95,6 +96,7 @@ int max_int_index(int *a, int n);
 boxabs box_to_boxabs(const box* b, const int img_w, const int img_h, const int bounds_check);
 int make_directory(char *path, int mode);
 unsigned long custom_hash(char *str);
+bool is_live_stream(const char * path);
 
 #define max_val_cmp(a,b) (((a) > (b)) ? (a) : (b))
 #define min_val_cmp(a,b) (((a) < (b)) ? (a) : (b))

From 81b768bae0017d386e95247d47ff14898b055308 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Wed, 12 May 2021 04:16:19 +0300
Subject: [PATCH 14/46] Fix [implicit] layer

---
 src/blas_kernels.cu        | 6 ++++--
 src/representation_layer.c | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index 21dbfbba575..85c55adfb82 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -2460,12 +2460,14 @@ __global__ void backward_implicit_kernel(int size, int batch, int nweights, floa
     const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
     if (id >= size) return;
 
-    weight_updates_gpu[id % nweights] += delta_gpu[id];
+    for (int i = 0; i < batch; ++i) {
+        weight_updates_gpu[id] += delta_gpu[id + i * nweights];
+    }
 }
 
 extern "C" void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu)
 {
-    int size = batch * nweights;
+    int size = nweights;
     backward_implicit_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, batch, nweights, weight_updates_gpu, delta_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
diff --git a/src/representation_layer.c b/src/representation_layer.c
index a6cc5c5f3f8..fe7741fa9db 100644
--- a/src/representation_layer.c
+++ b/src/representation_layer.c
@@ -67,7 +67,6 @@ void forward_implicit_layer(const layer l, network_state state)
 void backward_implicit_layer(const layer l, network_state state)
 {
     int i;
-    #pragma omp parallel for
     for (i = 0; i < l.nweights * l.batch; ++i) {
         l.weight_updates[i % l.nweights] += l.delta[i];
     }

From 179be4dcb4a70b74859df084cd5b590c9d9a805e Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Thu, 13 May 2021 22:31:59 +0200
Subject: [PATCH 15/46] add support for Windows PowerShell (#7702)

* add support for powershell v5

* revert opencv enabled on osx

* improve error handling in initial stages

* intercept failures also when updating darknet sources themselves

* press a key before exiting script, to preserve console host window if not running in a shell

* check path length
---
 build.ps1 | 139 +++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 117 insertions(+), 22 deletions(-)

diff --git a/build.ps1 b/build.ps1
index 8cf090f0a12..07fb47aafa1 100755
--- a/build.ps1
+++ b/build.ps1
@@ -14,9 +14,70 @@ param (
   [switch]$DoNotUseNinja = $false,
   [switch]$ForceCPP = $false,
   [switch]$ForceStaticLib = $false,
+  [switch]$ForceSetupVS = $false,
   [switch]$ForceGCC8 = $false
 )
 
+Function MyThrow ($Message) {
+  if ($DisableInteractive) {
+    Throw $Message
+  }
+  else {
+    # Check if running in PowerShell ISE
+    if ($psISE) {
+      # "ReadKey" not supported in PowerShell ISE.
+      # Show MessageBox UI
+      $Shell = New-Object -ComObject "WScript.Shell"
+      $Shell.Popup($Message, 0, "OK", 0)
+      return
+    }
+
+    $Ignore =
+    16, # Shift (left or right)
+    17, # Ctrl (left or right)
+    18, # Alt (left or right)
+    20, # Caps lock
+    91, # Windows key (left)
+    92, # Windows key (right)
+    93, # Menu key
+    144, # Num lock
+    145, # Scroll lock
+    166, # Back
+    167, # Forward
+    168, # Refresh
+    169, # Stop
+    170, # Search
+    171, # Favorites
+    172, # Start/Home
+    173, # Mute
+    174, # Volume Down
+    175, # Volume Up
+    176, # Next Track
+    177, # Previous Track
+    178, # Stop Media
+    179, # Play
+    180, # Mail
+    181, # Select Media
+    182, # Application 1
+    183  # Application 2
+
+    Write-Host $Message
+    Write-Host -NoNewline "Press any key to continue..."
+    while ($null -eq $KeyInfo.VirtualKeyCode -or $Ignore -contains $KeyInfo.VirtualKeyCode) {
+      $KeyInfo = $Host.UI.RawUI.ReadKey("NoEcho, IncludeKeyDown")
+    }
+    exit
+  }
+}
+
+if ($PSVersionTable.PSVersion.Major -eq 5) {
+  $IsWindowsPowerShell = $true
+}
+
+if ($PSVersionTable.PSVersion.Major -lt 5) {
+  MyThrow("Your PowerShell version is too old, please update it.")
+}
+
 if (-Not $DisableInteractive -and -Not $UseVCPKG) {
   $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)"
   if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
@@ -51,12 +112,12 @@ $number_of_build_workers = 8
 if ($IsLinux -or $IsMacOS) {
   $bootstrap_ext = ".sh"
 }
-elseif ($IsWindows) {
+elseif ($IsWindows -or $IsWindowsPowerShell) {
   $bootstrap_ext = ".bat"
 }
 Write-Host "Native shell script extension: ${bootstrap_ext}"
 
-if (-Not $IsWindows) {
+if (-Not $IsWindows -and -not $IsWindowsPowerShell -and -Not $ForceSetupVS) {
   $DoNotSetupVS = $true
 }
 
@@ -71,7 +132,7 @@ if ($IsLinux -and $ForceGCC8) {
   $env:CXX = "g++-8"
 }
 
-if ($IsWindows -and -Not $env:VCPKG_DEFAULT_TRIPLET) {
+if (($IsWindows -or $IsWindowsPowerShell) -and -Not $env:VCPKG_DEFAULT_TRIPLET) {
   $env:VCPKG_DEFAULT_TRIPLET = "x64-windows"
 }
 
@@ -158,19 +219,24 @@ Push-Location $PSScriptRoot
 
 $GIT_EXE = Get-Command git 2> $null | Select-Object -ExpandProperty Definition
 if (-Not $GIT_EXE) {
-  throw "Could not find git, please install it"
+  MyThrow("Could not find git, please install it")
 }
 else {
   Write-Host "Using git from ${GIT_EXE}"
 }
 
 if ((Test-Path "$PSScriptRoot/.git") -and -not $DoNotUpdateDARKNET) {
-  & $GIT_EXE pull
+  $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
+  $proc.WaitForExit()
+  $exitCode = $proc.ExitCode
+  if (-not $exitCode -eq 0) {
+    MyThrow("Updating darknet sources failed! Exited with $exitCode.")
+  }
 }
 
 $CMAKE_EXE = Get-Command cmake 2> $null | Select-Object -ExpandProperty Definition
 if (-Not $CMAKE_EXE) {
-  throw "Could not find CMake, please install it"
+  MyThrow("Could not find CMake, please install it")
 }
 else {
   Write-Host "Using CMake from ${CMAKE_EXE}"
@@ -195,7 +261,7 @@ function getProgramFiles32bit() {
   }
 
   if ($null -eq $out) {
-    throw "Could not find [Program Files 32-bit]"
+    MyThrow("Could not find [Program Files 32-bit]")
   }
 
   return $out
@@ -219,11 +285,11 @@ function getLatestVisualStudioWithDesktopWorkloadPath() {
       }
     }
     if (!$installationPath) {
-      Throw "Could not locate any installation of Visual Studio"
+      MyThrow("Could not locate any installation of Visual Studio")
     }
   }
   else {
-    Throw "Could not locate vswhere at $vswhereExe"
+    MyThrow("Could not locate vswhere at $vswhereExe")
   }
   return $installationPath
 }
@@ -247,11 +313,11 @@ function getLatestVisualStudioWithDesktopWorkloadVersion() {
       }
     }
     if (!$installationVersion) {
-      Throw "Could not locate any installation of Visual Studio"
+      MyThrow("Could not locate any installation of Visual Studio")
     }
   }
   else {
-    Throw "Could not locate vswhere at $vswhereExe"
+    MyThrow("Could not locate vswhere at $vswhereExe")
   }
   return $installationVersion
 }
@@ -268,15 +334,22 @@ elseif ((Test-Path "${env:WORKSPACE}/vcpkg") -and $UseVCPKG) {
   Write-Host "Found vcpkg in WORKSPACE/vcpkg: $vcpkg_path"
   $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
-elseif ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
-  $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}"
-  $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}"
-  Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${RUNVCPKG_VCPKG_ROOT_OUT}"
-  $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
+elseif (-not($null -eq ${RUNVCPKG_VCPKG_ROOT_OUT})) {
+  if((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
+    $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}"
+    $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}"
+    Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${vcpkg_path}"
+    $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
+  }
 }
 elseif ($UseVCPKG) {
   if (-Not (Test-Path "$PWD/vcpkg")) {
-    & $GIT_EXE clone https://github.com/microsoft/vcpkg
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "clone https://github.com/microsoft/vcpkg"
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-not $exitCode -eq 0) {
+      MyThrow("Cloning vcpkg sources failed! Exited with $exitCode.")
+    }
   }
   $vcpkg_path = "$PWD/vcpkg"
   $env:VCPKG_ROOT = "$PWD/vcpkg"
@@ -290,11 +363,33 @@ else {
 
 if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and -not $DoNotUpdateVCPKG) {
   Push-Location $vcpkg_path
-  & $GIT_EXE pull
-  & $PWD/bootstrap-vcpkg${bootstrap_ext} -disableMetrics
+  $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
+  $proc.WaitForExit()
+  $exitCode = $proc.ExitCode
+  if (-not $exitCode -eq 0) {
+    MyThrow("Updating vcpkg sources failed! Exited with $exitCode.")
+  }
+  $proc = Start-Process -NoNewWindow -PassThru -FilePath $PWD/bootstrap-vcpkg${bootstrap_ext} -ArgumentList "-disableMetrics"
+  $proc.WaitForExit()
+  $exitCode = $proc.ExitCode
+  if (-not $exitCode -eq 0) {
+    MyThrow("Bootstrapping vcpkg failed! Exited with $exitCode.")
+  }
   Pop-Location
 }
 
+if ($UseVCPKG -and ($vcpkg_path.length -gt 40) -and ($IsWindows -or $IsWindowsPowerShell)) {
+  Write-Host "vcpkg path is very long and might fail. Please move it or" -ForegroundColor Yellow
+  Write-Host "the entire darknet folder to a shorter path, like C:\darknet" -ForegroundColor Yellow
+  Write-Host "You can use the subst command to ease the process if necessary" -ForegroundColor Yellow
+  if (-Not $DisableInteractive) {
+    $Result = Read-Host "Do you still want to continue? (yes/no)"
+    if ($Result -eq 'No' -or $Result -eq 'N' -or $Result -eq 'no' -or $Result -eq 'n') {
+      MyThrow("Build aborted")
+    }
+  }
+}
+
 if (-Not $DoNotSetupVS) {
   if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) {
     $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
@@ -328,7 +423,7 @@ if (-Not $DoNotSetupVS) {
       $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`""
     }
     else {
-      throw "Unknown Visual Studio version, unsupported configuration"
+      MyThrow("Unknown Visual Studio version, unsupported configuration")
     }
   }
   if (-Not $UseVCPKG) {
@@ -397,13 +492,13 @@ $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList
 $proc.WaitForExit()
 $exitCode = $proc.ExitCode
 if (-not $exitCode -eq 0) {
-  Throw "Config failed! Exited with $exitCode."
+  MyThrow("Config failed! Exited with $exitCode.")
 }
 $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
 $proc.WaitForExit()
 $exitCode = $proc.ExitCode
 if (-not $exitCode -eq 0) {
-  Throw "Config failed! Exited with $exitCode."
+  MyThrow("Config failed! Exited with $exitCode.")
 }
 Remove-Item DarknetConfig.cmake
 Remove-Item DarknetConfigVersion.cmake

From 0024a6a3c8dd029dac2211277ca0f9b633872cfe Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Sun, 16 May 2021 21:16:49 +0200
Subject: [PATCH 16/46] [build.ps1] improvements (#7711)

* use parentheses to make clear operator order, print out versions

* add english language pack note in README.md

* add even more build info
---
 CMakeLists.txt |   2 +
 README.md      |  95 +++++++++++++++++------------------
 build.ps1      | 133 +++++++++++++++++++++++++++++++------------------
 3 files changed, 132 insertions(+), 98 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0029abe78ee..49df87cf3a8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,8 @@ set(Darknet_PATCH_VERSION 5)
 set(Darknet_TWEAK_VERSION 4)
 set(Darknet_VERSION ${Darknet_MAJOR_VERSION}.${Darknet_MINOR_VERSION}.${Darknet_PATCH_VERSION}.${Darknet_TWEAK_VERSION})
 
+message("Darknet_VERSION: ${Darknet_VERSION}")
+
 option(CMAKE_VERBOSE_MAKEFILE "Create verbose makefile" ON)
 option(CUDA_VERBOSE_BUILD "Create verbose CUDA build" ON)
 option(BUILD_SHARED_LIBS "Create dark as a shared library" ON)
diff --git a/README.md b/README.md
index 4a4a67f49f8..acd2506d16c 100644
--- a/README.md
+++ b/README.md
@@ -6,16 +6,18 @@ Paper YOLO v4: https://arxiv.org/abs/2004.10934
 
 Paper Scaled YOLO v4: https://arxiv.org/abs/2011.08036  use to reproduce results: [ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)
 
-More details in articles on medium: 
- * [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) 
- * [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) 
+More details in articles on medium:
+
+* [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) 
+* [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) 
 
 Manual: https://github.com/AlexeyAB/darknet/wiki
 
-Discussion: 
- - [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/)
- - [Google-groups](https://groups.google.com/forum/#!forum/darknet)
- - [Discord](https://discord.gg/zSq8rtW)
+Discussion:
+
+* [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/)
+* [Google-groups](https://groups.google.com/forum/#!forum/darknet)
+* [Discord](https://discord.gg/zSq8rtW)
 
 About Darknet framework: http://pjreddie.com/darknet/
 
@@ -29,7 +31,6 @@ About Darknet framework: http://pjreddie.com/darknet/
 [![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE)
 [![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg)
 
-
 * [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
 * [Requirements (and how to install dependencies)](#requirements)
 * [Pre-trained models](#pre-trained-models)
@@ -40,7 +41,7 @@ About Darknet framework: http://pjreddie.com/darknet/
 
 - [Yolo v4, v3 and v2 for Windows and Linux](#yolo-v4-v3-and-v2-for-windows-and-linux)
   - [(neural networks for object detection)](#neural-networks-for-object-detection)
-      - [GeForce RTX 2080 Ti:](#geforce-rtx-2080-ti)
+    - [GeForce RTX 2080 Ti](#geforce-rtx-2080-ti)
       - [Youtube video of results](#youtube-video-of-results)
       - [How to evaluate AP of YOLOv4 on the MS COCO evaluation server](#how-to-evaluate-ap-of-yolov4-on-the-ms-coco-evaluation-server)
       - [How to evaluate FPS of YOLOv4 on GPU](#how-to-evaluate-fps-of-yolov4-on-gpu)
@@ -61,9 +62,9 @@ About Darknet framework: http://pjreddie.com/darknet/
   - [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
     - [How to train tiny-yolo (to detect your custom objects):](#how-to-train-tiny-yolo-to-detect-your-custom-objects)
   - [When should I stop training:](#when-should-i-stop-training)
-    - [Custom object detection:](#custom-object-detection)
-  - [How to improve object detection:](#how-to-improve-object-detection)
-  - [How to mark bounded boxes of objects and create annotation files:](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files)
+    - [Custom object detection](#custom-object-detection)
+  - [How to improve object detection](#how-to-improve-object-detection)
+  - [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files)
   - [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries)
 
 ![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) 
@@ -74,20 +75,21 @@ About Darknet framework: http://pjreddie.com/darknet/
 
 ![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 
 
-
 tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4.
+
 * tkDNN: https://github.com/ceccocats/tkDNN
 * OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf
 
-#### GeForce RTX 2080 Ti:
-| Network Size 	| Darknet, FPS (avg)| tkDNN TensorRT FP32, FPS  | tkDNN TensorRT FP16, FPS  | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS  | OpenCV FP16 batch=4, FPS | tkDNN Speedup |
-|:-----:|:--------:|--------:|--------:|--------:|--------:|--------:|------:|
-|320	| 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** |
-|416	| 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** |
-|512	| 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** |
-|608 	| 53 | 62 | 103 | **115**| 150 | **150** | **2.8x**  |
-|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x**  |
-|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x**  |
+### GeForce RTX 2080 Ti
+
+| Network Size               | Darknet, FPS (avg) | tkDNN TensorRT FP32, FPS | tkDNN TensorRT FP16, FPS | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS | OpenCV FP16 batch=4, FPS | tkDNN Speedup |
+|:--------------------------:|:------------------:|-------------------------:|-------------------------:|-----------------:|---------------------------------:|-------------------------:|--------------:|
+|320                         | 100                | 116                      | **202**                  | 183              | 423                              | **430**                  | **4.3x**      |
+|416                         | 82                 | 103                      | **162**                  | 159              | 284                              | **294**                  | **3.6x**      |
+|512                         | 69                 | 91                       | 134                      | **138**          | 206                              | **216**                  | **3.1x**      |
+|608                         | 53                 | 62                       | 103                      | **115**          | 150                              | **150**                  | **2.8x**      |
+|Tiny 416                    | 443                | 609                      | **790**                  | 773              | **1774**                         | 1353                     | **3.5x**      |
+|Tiny 416 CPU Core i7 7700HQ | 3.4                | -                        | -                        | 42               | -                                | 39                       | **12x**       |
 
 * Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png)
 * Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png)
@@ -144,15 +146,15 @@ There are weights-file for different cfg-files (trained for MS COCO dataset):
 FPS on RTX 2070 (R) and Tesla V100 (V):
 
 * [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights) 
-   * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166
+  * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166
 
 * [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036)
 
     just change `width=` and `height=` parameters in `yolov4-csp.cfg` file and use the same `yolov4-csp.weights` file for all cases:
   * `width=640 height=640` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops
   * `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops
-   * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142
-   
+  * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142
+
 * [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934)
     just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases:
   * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops
@@ -213,11 +215,11 @@ You can get cfg-files by path: `darknet/cfg/`
     For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
 * **OpenCV** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
 * **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) (for [Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large) models use https://github.com/Chen-MingChang/pytorch_YOLO_OpenVINO_demo )
-* **PyTorch > ONNX**: 
-    * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
-    * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
-    * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
-    * [YOLOv5](https://github.com/ultralytics/yolov5)
+* **PyTorch > ONNX**:
+  * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
+  * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
+  * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
+  * [YOLOv5](https://github.com/ultralytics/yolov5)
 * **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
 * **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results
 * **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
@@ -392,7 +394,7 @@ In Windows:
 
 This is the recommended approach to build Darknet on Windows.
 
-1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com)
+1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com). Remember to install English language pack, this is mandatory for vcpkg!
 
 2. Install CUDA (at least v10.0) enabling VS Integration during installation.
 
@@ -423,7 +425,6 @@ https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ
 Training Yolo v4 (and v3):
 
 0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
-
 1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and:
 
 * change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3)
@@ -445,9 +446,8 @@ Training Yolo v4 (and v3):
   * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789
 
 So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`.
-  
 **(Do not write in the cfg-file: filters=(classes + 5)x3)**
-  
+
 (Generally `filters` depends on the `classes`, `coords` and number of `mask`s, i.e. filters=`(classes + coords + 1)*<number of mask>`, where `mask` is indices of anchors. If `mask` is absence, then filters=`(classes + coords + 1)*num`)
 
 So for example, for 2 objects, your file `yolo-obj.cfg` should differ from `yolov4-custom.cfg` in such lines in each of **3** [yolo]-layers:
@@ -461,7 +461,6 @@ classes=2
 ```
 
 2. Create file `obj.names` in the directory `build\darknet\x64\data\`, with objects names - each in new line
-
 3. Create file `obj.data` in the directory `build\darknet\x64\data\`, containing (where **classes = number of objects**):
 
   ```ini
@@ -473,7 +472,6 @@ classes=2
   ```
 
 4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\`
-
 5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark
 
 It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: 
@@ -509,12 +507,11 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an
     * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74)
     * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing)
     * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing)
-    
 
 8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137`
-     
+
    To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`)
-     
+
    * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations)
    * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations)
    * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2)
@@ -529,18 +526,19 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an
     (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`)
 
  * Also you can get result earlier than all 45000 iterations.
- 
+
  **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well.
- 
+
  **Note:** If you changed width= or height= in your cfg-file, then new width and height must be divisible by 32.
- 
+
  **Note:** After training use such command for detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
- 
+
   **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
- 
+
 ### How to train tiny-yolo (to detect your custom objects):
 
 Do all the same steps as for the full yolo model as described above. With the exception of:
+
 * Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29
  (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29`
 * Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg`
@@ -548,7 +546,7 @@ Do all the same steps as for the full yolo model as described above. With the ex
 
 For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd
 If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights.
- 
+
 ## When should I stop training:
 
 Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual:
@@ -613,15 +611,14 @@ In terms of Wiki, indicators Precision and Recall have a slightly different mean
 
 ![precision_recall_iou](https://hsto.org/files/ca8/866/d76/ca8866d76fb840228940dbf442a7f06a.jpg)
 
-
-### Custom object detection:
+### Custom object detection
 
 Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
 
 | ![Yolo_v2_training](https://hsto.org/files/d12/1e7/515/d121e7515f6a4eb694913f10de5f2b61.jpg) | ![Yolo_v2_training](https://hsto.org/files/727/c7e/5e9/727c7e5e99bf4d4aa34027bb6a5e4bab.jpg) |
 |---|---|
 
-## How to improve object detection:
+## How to improve object detection
 
 1. Before training:
 
@@ -680,7 +677,7 @@ then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But
 
 * to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
 
-## How to mark bounded boxes of objects and create annotation files:
+## How to mark bounded boxes of objects and create annotation files
 
 Here you can find repository with GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 - v4: https://github.com/AlexeyAB/Yolo_mark
 
diff --git a/build.ps1 b/build.ps1
index 07fb47aafa1..6c7d297e9a1 100755
--- a/build.ps1
+++ b/build.ps1
@@ -1,5 +1,6 @@
 #!/usr/bin/env pwsh
 
+
 param (
   [switch]$DisableInteractive = $false,
   [switch]$EnableCUDA = $false,
@@ -15,12 +16,14 @@ param (
   [switch]$ForceCPP = $false,
   [switch]$ForceStaticLib = $false,
   [switch]$ForceSetupVS = $false,
-  [switch]$ForceGCC8 = $false
+  [Int32]$ForceGCCVersion = 0
 )
 
+$build_ps1_version = "0.9"
 Function MyThrow ($Message) {
   if ($DisableInteractive) {
-    Throw $Message
+    Write-Host $Message -ForegroundColor Red
+    exit
   }
   else {
     # Check if running in PowerShell ISE
@@ -61,15 +64,19 @@ Function MyThrow ($Message) {
     182, # Application 1
     183  # Application 2
 
-    Write-Host $Message
+    Write-Host $Message -ForegroundColor Red
     Write-Host -NoNewline "Press any key to continue..."
-    while ($null -eq $KeyInfo.VirtualKeyCode -or $Ignore -contains $KeyInfo.VirtualKeyCode) {
+    while (($null -eq $KeyInfo.VirtualKeyCode) -or ($Ignore -contains $KeyInfo.VirtualKeyCode)) {
       $KeyInfo = $Host.UI.RawUI.ReadKey("NoEcho, IncludeKeyDown")
     }
     exit
   }
 }
 
+Write-Host "Darknet build script version ${build_ps1_version}"
+Write-Host -NoNewLine "PowerShell version:"
+$PSVersionTable.PSVersion
+
 if ($PSVersionTable.PSVersion.Major -eq 5) {
   $IsWindowsPowerShell = $true
 }
@@ -78,30 +85,30 @@ if ($PSVersionTable.PSVersion.Major -lt 5) {
   MyThrow("Your PowerShell version is too old, please update it.")
 }
 
-if (-Not $DisableInteractive -and -Not $UseVCPKG) {
+if ((-Not $DisableInteractive) -and (-Not $UseVCPKG)) {
   $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)"
-  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+  if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) {
     $UseVCPKG = $true
   }
 }
 
-if (-Not $DisableInteractive -and -Not $EnableCUDA -and -Not $IsMacOS) {
+if ((-Not $DisableInteractive) -and (-Not $EnableCUDA) -and (-Not $IsMacOS)) {
   $Result = Read-Host "Enable CUDA integration (yes/no)"
-  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+  if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) {
     $EnableCUDA = $true
   }
 }
 
-if ($EnableCUDA -and -Not $DisableInteractive -and -Not $EnableCUDNN) {
+if ($EnableCUDA -and (-Not $DisableInteractive) -and (-Not $EnableCUDNN)) {
   $Result = Read-Host "Enable CUDNN optional dependency (yes/no)"
-  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+  if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) {
     $EnableCUDNN = $true
   }
 }
 
-if (-Not $DisableInteractive -and -Not $EnableOPENCV) {
+if ((-Not $DisableInteractive) -and (-Not $EnableOPENCV)) {
   $Result = Read-Host "Enable OpenCV optional dependency (yes/no)"
-  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+  if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) {
     $EnableOPENCV = $true
   }
 }
@@ -115,9 +122,11 @@ if ($IsLinux -or $IsMacOS) {
 elseif ($IsWindows -or $IsWindowsPowerShell) {
   $bootstrap_ext = ".bat"
 }
-Write-Host "Native shell script extension: ${bootstrap_ext}"
+if ($UseVCPKG) {
+  Write-Host "vcpkg bootstrap script: bootstrap-vcpkg${bootstrap_ext}"
+}
 
-if (-Not $IsWindows -and -not $IsWindowsPowerShell -and -Not $ForceSetupVS) {
+if ((-Not $IsWindows) -and (-Not $IsWindowsPowerShell) -and (-Not $ForceSetupVS)) {
   $DoNotSetupVS = $true
 }
 
@@ -126,13 +135,13 @@ if ($ForceStaticLib) {
   $additional_build_setup = " -DBUILD_SHARED_LIBS=OFF "
 }
 
-if ($IsLinux -and $ForceGCC8) {
-  Write-Host "Manually setting CC and CXX variables to gcc-8 and g++-8"
-  $env:CC = "gcc-8"
-  $env:CXX = "g++-8"
+if (($IsLinux -or $IsMacOS) -and ($ForceGCCVersion -gt 0)) {
+  Write-Host "Manually setting CC and CXX variables to gcc version $ForceGCCVersion"
+  $env:CC = "gcc-$ForceGCCVersion"
+  $env:CXX = "g++-$ForceGCCVersion"
 }
 
-if (($IsWindows -or $IsWindowsPowerShell) -and -Not $env:VCPKG_DEFAULT_TRIPLET) {
+if (($IsWindows -or $IsWindowsPowerShell) -and (-Not $env:VCPKG_DEFAULT_TRIPLET)) {
   $env:VCPKG_DEFAULT_TRIPLET = "x64-windows"
 }
 
@@ -165,18 +174,18 @@ else {
   Write-Host "OPENCV is disabled, please pass -EnableOPENCV to the script to enable"
 }
 
-if ($EnableCUDA -and $EnableOPENCV -and -not $EnableOPENCV_CUDA) {
+if ($EnableCUDA -and $EnableOPENCV -and (-Not $EnableOPENCV_CUDA)) {
   Write-Host "OPENCV with CUDA extension is not enabled, you can enable it passing -EnableOPENCV_CUDA"
 }
-elseif ($EnableOPENCV -and $EnableOPENCV_CUDA -and -not $EnableCUDA) {
+elseif ($EnableOPENCV -and $EnableOPENCV_CUDA -and (-Not $EnableCUDA)) {
   Write-Host "OPENCV with CUDA extension was requested, but CUDA is not enabled, you can enable it passing -EnableCUDA"
   $EnableOPENCV_CUDA = $false
 }
-elseif ($EnableCUDA -and $EnableOPENCV_CUDA -and -not $EnableOPENCV) {
+elseif ($EnableCUDA -and $EnableOPENCV_CUDA -and (-Not $EnableOPENCV)) {
   Write-Host "OPENCV with CUDA extension was requested, but OPENCV is not enabled, you can enable it passing -EnableOPENCV"
   $EnableOPENCV_CUDA = $false
 }
-elseif ($EnableOPENCV_CUDA -and -not $EnableCUDA -and -not $EnableOPENCV) {
+elseif ($EnableOPENCV_CUDA -and (-Not $EnableCUDA) -and (-Not $EnableOPENCV)) {
   Write-Host "OPENCV with CUDA extension was requested, but OPENCV and CUDA are not enabled, you can enable them passing -EnableOPENCV -EnableCUDA"
   $EnableOPENCV_CUDA = $false
 }
@@ -225,12 +234,19 @@ else {
   Write-Host "Using git from ${GIT_EXE}"
 }
 
-if ((Test-Path "$PSScriptRoot/.git") -and -not $DoNotUpdateDARKNET) {
-  $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
-  $proc.WaitForExit()
-  $exitCode = $proc.ExitCode
-  if (-not $exitCode -eq 0) {
-    MyThrow("Updating darknet sources failed! Exited with $exitCode.")
+if (Test-Path "$PSScriptRoot/.git") {
+  Write-Host "Darknet has been cloned with git and supports self-updating mechanism"
+  if ($DoNotUpdateDARKNET) {
+    Write-Host "Darknet will not self-update sources" -ForegroundColor Yellow
+  }
+  else {
+    Write-Host "Darknet will self-update sources, please pass -DoNotUpdateDARKNET to the script to disable"
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-Not ($exitCode -eq 0)) {
+      MyThrow("Updating darknet sources failed! Exited with error code $exitCode.")
+    }
   }
 }
 
@@ -240,6 +256,12 @@ if (-Not $CMAKE_EXE) {
 }
 else {
   Write-Host "Using CMake from ${CMAKE_EXE}"
+  $proc = Start-Process -NoNewWindow -PassThru -FilePath ${CMAKE_EXE} -ArgumentList "--version"
+  $proc.WaitForExit()
+  $exitCode = $proc.ExitCode
+  if (-Not ($exitCode -eq 0)) {
+    MyThrow("CMake version check failed! Exited with error code $exitCode.")
+  }
 }
 
 if (-Not $DoNotUseNinja) {
@@ -250,7 +272,17 @@ if (-Not $DoNotUseNinja) {
   }
   else {
     Write-Host "Using Ninja from ${NINJA_EXE}"
-    $generator = "Ninja"
+    Write-Host -NoNewLine "Ninja version "
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath ${NINJA_EXE} -ArgumentList "--version"
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-Not ($exitCode -eq 0)) {
+      $DoNotUseNinja = $true
+      Write-Host "Unable to run Ninja previously found, using msbuild or make backends as a fallback" -ForegroundColor Yellow
+    }
+    else {
+      $generator = "Ninja"
+    }
   }
 }
 
@@ -347,8 +379,8 @@ elseif ($UseVCPKG) {
     $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "clone https://github.com/microsoft/vcpkg"
     $proc.WaitForExit()
     $exitCode = $proc.ExitCode
-    if (-not $exitCode -eq 0) {
-      MyThrow("Cloning vcpkg sources failed! Exited with $exitCode.")
+    if (-not ($exitCode -eq 0)) {
+      MyThrow("Cloning vcpkg sources failed! Exited with error code $exitCode.")
     }
   }
   $vcpkg_path = "$PWD/vcpkg"
@@ -361,19 +393,19 @@ else {
   $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF"
 }
 
-if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and -not $DoNotUpdateVCPKG) {
+if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and (-Not $DoNotUpdateVCPKG)) {
   Push-Location $vcpkg_path
   $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
   $proc.WaitForExit()
   $exitCode = $proc.ExitCode
-  if (-not $exitCode -eq 0) {
-    MyThrow("Updating vcpkg sources failed! Exited with $exitCode.")
+  if (-Not ($exitCode -eq 0)) {
+    MyThrow("Updating vcpkg sources failed! Exited with error code $exitCode.")
   }
   $proc = Start-Process -NoNewWindow -PassThru -FilePath $PWD/bootstrap-vcpkg${bootstrap_ext} -ArgumentList "-disableMetrics"
   $proc.WaitForExit()
   $exitCode = $proc.ExitCode
-  if (-not $exitCode -eq 0) {
-    MyThrow("Bootstrapping vcpkg failed! Exited with $exitCode.")
+  if (-Not ($exitCode -eq 0)) {
+    MyThrow("Bootstrapping vcpkg failed! Exited with error code $exitCode.")
   }
   Pop-Location
 }
@@ -384,7 +416,7 @@ if ($UseVCPKG -and ($vcpkg_path.length -gt 40) -and ($IsWindows -or $IsWindowsPo
   Write-Host "You can use the subst command to ease the process if necessary" -ForegroundColor Yellow
   if (-Not $DisableInteractive) {
     $Result = Read-Host "Do you still want to continue? (yes/no)"
-    if ($Result -eq 'No' -or $Result -eq 'N' -or $Result -eq 'no' -or $Result -eq 'n') {
+    if (($Result -eq 'No') -or ($Result -eq 'N') -or ($Result -eq 'no') -or ($Result -eq 'n')) {
       MyThrow("Build aborted")
     }
   }
@@ -462,19 +494,19 @@ if ($ForceCPP) {
   $additional_build_setup = $additional_build_setup + " -DBUILD_AS_CPP:BOOL=ON"
 }
 
-if (-Not($EnableCUDA)) {
+if (-Not $EnableCUDA) {
   $additional_build_setup = $additional_build_setup + " -DENABLE_CUDA:BOOL=OFF"
 }
 
-if (-Not($EnableCUDNN)) {
+if (-Not $EnableCUDNN) {
   $additional_build_setup = $additional_build_setup + " -DENABLE_CUDNN:BOOL=OFF"
 }
 
-if (-Not($EnableOPENCV)) {
+if (-Not $EnableOPENCV) {
   $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV:BOOL=OFF"
 }
 
-if (-Not($EnableOPENCV_CUDA)) {
+if (-Not $EnableOPENCV_CUDA) {
   $additional_build_setup = $additional_build_setup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF"
 }
 
@@ -484,28 +516,31 @@ if (-Not $DoNotDeleteBuildFolder) {
   Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder
 }
 
-New-Item -Path $build_folder -ItemType directory -Force
+New-Item -Path $build_folder -ItemType directory -Force | Out-Null
 Set-Location $build_folder
 $cmake_args = "-G `"$generator`" ${additional_build_setup} -S .."
+Write-Host "Configuring CMake project" -ForegroundColor Green
 Write-Host "CMake args: $cmake_args"
 $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
 $proc.WaitForExit()
 $exitCode = $proc.ExitCode
-if (-not $exitCode -eq 0) {
-  MyThrow("Config failed! Exited with $exitCode.")
+if (-Not ($exitCode -eq 0)) {
+  MyThrow("Config failed! Exited with error code $exitCode.")
 }
+Write-Host "Building CMake project" -ForegroundColor Green
 $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
 $proc.WaitForExit()
 $exitCode = $proc.ExitCode
-if (-not $exitCode -eq 0) {
-  MyThrow("Config failed! Exited with $exitCode.")
+if (-Not ($exitCode -eq 0)) {
+  MyThrow("Config failed! Exited with error code $exitCode.")
 }
-Remove-Item DarknetConfig.cmake
-Remove-Item DarknetConfigVersion.cmake
+Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfig.cmake
+Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfigVersion.cmake
 $dllfiles = Get-ChildItem ./${dllfolder}/*.dll
 if ($dllfiles) {
   Copy-Item $dllfiles ..
 }
 Set-Location ..
 Copy-Item cmake/Modules/*.cmake share/darknet/
+Write-Host "Build complete!" -ForegroundColor Green
 Pop-Location

From bdb5fe3e91988b586382fa1674e6315032f142cb Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Tue, 18 May 2021 21:47:15 +0200
Subject: [PATCH 17/46] fixes for PowerShell ISE (#7715)

---
 build.ps1 | 76 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 33 deletions(-)

diff --git a/build.ps1 b/build.ps1
index 6c7d297e9a1..38282cf8329 100755
--- a/build.ps1
+++ b/build.ps1
@@ -16,14 +16,16 @@ param (
   [switch]$ForceCPP = $false,
   [switch]$ForceStaticLib = $false,
   [switch]$ForceSetupVS = $false,
-  [Int32]$ForceGCCVersion = 0
+  [Int32]$ForceGCCVersion = 0,
+  [Int32]$NumberOfBuildWorkers = 8,
+  [string]$AdditionalBuildSetup = ""  # "-DCMAKE_CUDA_ARCHITECTURES=30"
 )
 
-$build_ps1_version = "0.9"
+$build_ps1_version = "0.9.1"
 Function MyThrow ($Message) {
   if ($DisableInteractive) {
     Write-Host $Message -ForegroundColor Red
-    exit
+    throw
   }
   else {
     # Check if running in PowerShell ISE
@@ -32,7 +34,7 @@ Function MyThrow ($Message) {
       # Show MessageBox UI
       $Shell = New-Object -ComObject "WScript.Shell"
       $Shell.Popup($Message, 0, "OK", 0)
-      return
+      throw
     }
 
     $Ignore =
@@ -69,21 +71,12 @@ Function MyThrow ($Message) {
     while (($null -eq $KeyInfo.VirtualKeyCode) -or ($Ignore -contains $KeyInfo.VirtualKeyCode)) {
       $KeyInfo = $Host.UI.RawUI.ReadKey("NoEcho, IncludeKeyDown")
     }
-    exit
+    Write-Host ""
+    throw
   }
 }
 
 Write-Host "Darknet build script version ${build_ps1_version}"
-Write-Host -NoNewLine "PowerShell version:"
-$PSVersionTable.PSVersion
-
-if ($PSVersionTable.PSVersion.Major -eq 5) {
-  $IsWindowsPowerShell = $true
-}
-
-if ($PSVersionTable.PSVersion.Major -lt 5) {
-  MyThrow("Your PowerShell version is too old, please update it.")
-}
 
 if ((-Not $DisableInteractive) -and (-Not $UseVCPKG)) {
   $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)"
@@ -113,8 +106,17 @@ if ((-Not $DisableInteractive) -and (-Not $EnableOPENCV)) {
   }
 }
 
-$number_of_build_workers = 8
-#$additional_build_setup = " -DCMAKE_CUDA_ARCHITECTURES=30"
+Write-Host -NoNewLine "PowerShell version:"
+$PSVersionTable.PSVersion
+
+if ($PSVersionTable.PSVersion.Major -eq 5) {
+  $IsWindowsPowerShell = $true
+}
+
+if ($PSVersionTable.PSVersion.Major -lt 5) {
+  MyThrow("Your PowerShell version is too old, please update it.")
+}
+
 
 if ($IsLinux -or $IsMacOS) {
   $bootstrap_ext = ".sh"
@@ -132,7 +134,7 @@ if ((-Not $IsWindows) -and (-Not $IsWindowsPowerShell) -and (-Not $ForceSetupVS)
 
 if ($ForceStaticLib) {
   Write-Host "Forced CMake to produce a static library"
-  $additional_build_setup = " -DBUILD_SHARED_LIBS=OFF "
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DBUILD_SHARED_LIBS=OFF "
 }
 
 if (($IsLinux -or $IsMacOS) -and ($ForceGCCVersion -gt 0)) {
@@ -242,6 +244,7 @@ if (Test-Path "$PSScriptRoot/.git") {
   else {
     Write-Host "Darknet will self-update sources, please pass -DoNotUpdateDARKNET to the script to disable"
     $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
+    $handle = $proc.Handle
     $proc.WaitForExit()
     $exitCode = $proc.ExitCode
     if (-Not ($exitCode -eq 0)) {
@@ -257,6 +260,7 @@ if (-Not $CMAKE_EXE) {
 else {
   Write-Host "Using CMake from ${CMAKE_EXE}"
   $proc = Start-Process -NoNewWindow -PassThru -FilePath ${CMAKE_EXE} -ArgumentList "--version"
+  $handle = $proc.Handle
   $proc.WaitForExit()
   $exitCode = $proc.ExitCode
   if (-Not ($exitCode -eq 0)) {
@@ -274,6 +278,7 @@ if (-Not $DoNotUseNinja) {
     Write-Host "Using Ninja from ${NINJA_EXE}"
     Write-Host -NoNewLine "Ninja version "
     $proc = Start-Process -NoNewWindow -PassThru -FilePath ${NINJA_EXE} -ArgumentList "--version"
+    $handle = $proc.Handle
     $proc.WaitForExit()
     $exitCode = $proc.ExitCode
     if (-Not ($exitCode -eq 0)) {
@@ -358,25 +363,26 @@ function getLatestVisualStudioWithDesktopWorkloadVersion() {
 if ((Test-Path env:VCPKG_ROOT) -and $UseVCPKG) {
   $vcpkg_path = "$env:VCPKG_ROOT"
   Write-Host "Found vcpkg in VCPKG_ROOT: $vcpkg_path"
-  $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
 elseif ((Test-Path "${env:WORKSPACE}/vcpkg") -and $UseVCPKG) {
   $vcpkg_path = "${env:WORKSPACE}/vcpkg"
   $env:VCPKG_ROOT = "${env:WORKSPACE}/vcpkg"
   Write-Host "Found vcpkg in WORKSPACE/vcpkg: $vcpkg_path"
-  $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
 elseif (-not($null -eq ${RUNVCPKG_VCPKG_ROOT_OUT})) {
   if((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
     $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}"
     $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}"
     Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${vcpkg_path}"
-    $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
+    $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
   }
 }
 elseif ($UseVCPKG) {
   if (-Not (Test-Path "$PWD/vcpkg")) {
     $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "clone https://github.com/microsoft/vcpkg"
+    $handle = $proc.Handle
     $proc.WaitForExit()
     $exitCode = $proc.ExitCode
     if (-not ($exitCode -eq 0)) {
@@ -386,22 +392,24 @@ elseif ($UseVCPKG) {
   $vcpkg_path = "$PWD/vcpkg"
   $env:VCPKG_ROOT = "$PWD/vcpkg"
   Write-Host "Found vcpkg in $PWD/vcpkg: $PWD/vcpkg"
-  $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
 else {
   Write-Host "Skipping vcpkg integration`n" -ForegroundColor Yellow
-  $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF"
 }
 
 if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and (-Not $DoNotUpdateVCPKG)) {
   Push-Location $vcpkg_path
   $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull"
+  $handle = $proc.Handle
   $proc.WaitForExit()
   $exitCode = $proc.ExitCode
   if (-Not ($exitCode -eq 0)) {
     MyThrow("Updating vcpkg sources failed! Exited with error code $exitCode.")
   }
   $proc = Start-Process -NoNewWindow -PassThru -FilePath $PWD/bootstrap-vcpkg${bootstrap_ext} -ArgumentList "-disableMetrics"
+  $handle = $proc.Handle
   $proc.WaitForExit()
   $exitCode = $proc.ExitCode
   if (-Not ($exitCode -eq 0)) {
@@ -444,15 +452,15 @@ if (-Not $DoNotSetupVS) {
     $selectConfig = " --config Release "
     if ($tokens[0] -eq "14") {
       $generator = "Visual Studio 14 2015"
-      $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`""
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A `"x64`""
     }
     elseif ($tokens[0] -eq "15") {
       $generator = "Visual Studio 15 2017"
-      $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`""
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A `"x64`""
     }
     elseif ($tokens[0] -eq "16") {
       $generator = "Visual Studio 16 2019"
-      $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`""
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A `"x64`""
     }
     else {
       MyThrow("Unknown Visual Studio version, unsupported configuration")
@@ -491,23 +499,23 @@ if (-Not $IsMacOS -and $EnableCUDA) {
 }
 
 if ($ForceCPP) {
-  $additional_build_setup = $additional_build_setup + " -DBUILD_AS_CPP:BOOL=ON"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DBUILD_AS_CPP:BOOL=ON"
 }
 
 if (-Not $EnableCUDA) {
-  $additional_build_setup = $additional_build_setup + " -DENABLE_CUDA:BOOL=OFF"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CUDA:BOOL=OFF"
 }
 
 if (-Not $EnableCUDNN) {
-  $additional_build_setup = $additional_build_setup + " -DENABLE_CUDNN:BOOL=OFF"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CUDNN:BOOL=OFF"
 }
 
 if (-Not $EnableOPENCV) {
-  $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV:BOOL=OFF"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_OPENCV:BOOL=OFF"
 }
 
 if (-Not $EnableOPENCV_CUDA) {
-  $additional_build_setup = $additional_build_setup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF"
 }
 
 $build_folder = "./build_release"
@@ -518,17 +526,19 @@ if (-Not $DoNotDeleteBuildFolder) {
 
 New-Item -Path $build_folder -ItemType directory -Force | Out-Null
 Set-Location $build_folder
-$cmake_args = "-G `"$generator`" ${additional_build_setup} -S .."
+$cmake_args = "-G `"$generator`" ${AdditionalBuildSetup} -S .."
 Write-Host "Configuring CMake project" -ForegroundColor Green
 Write-Host "CMake args: $cmake_args"
 $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
+$handle = $proc.Handle
 $proc.WaitForExit()
 $exitCode = $proc.ExitCode
 if (-Not ($exitCode -eq 0)) {
   MyThrow("Config failed! Exited with error code $exitCode.")
 }
 Write-Host "Building CMake project" -ForegroundColor Green
-$proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install"
+$proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${NumberOfBuildWorkers} --target install"
+$handle = $proc.Handle
 $proc.WaitForExit()
 $exitCode = $proc.ExitCode
 if (-Not ($exitCode -eq 0)) {

From ac8ebca0639f445ae456a1da05a13496ae0fcdc2 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Wed, 19 May 2021 22:46:20 +0200
Subject: [PATCH 18/46] [build.ps1] download ninja automatically if missing +
 other small updates (#7721)

* self download ninja if not found on system and not requested to opt-out

* add a flag to clean up vcpkg local binary cache to force restart from scratch

* really delete if requested
---
 .gitignore |  2 ++
 build.ps1  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 174f0b5a378..716233d08c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,8 @@ cfg/
 temp/
 build/darknet/*
 build_*/
+ninja/
+ninja.zip
 vcpkg_installed/
 !build/darknet/YoloWrapper.cs
 .fuse*
diff --git a/build.ps1 b/build.ps1
index 38282cf8329..9e33c3f5186 100755
--- a/build.ps1
+++ b/build.ps1
@@ -15,13 +15,15 @@ param (
   [switch]$DoNotUseNinja = $false,
   [switch]$ForceCPP = $false,
   [switch]$ForceStaticLib = $false,
+  [switch]$ForceVCPKGCacheRemoval = $false,
   [switch]$ForceSetupVS = $false,
   [Int32]$ForceGCCVersion = 0,
   [Int32]$NumberOfBuildWorkers = 8,
   [string]$AdditionalBuildSetup = ""  # "-DCMAKE_CUDA_ARCHITECTURES=30"
 )
 
-$build_ps1_version = "0.9.1"
+$build_ps1_version = "0.9.2"
+
 Function MyThrow ($Message) {
   if ($DisableInteractive) {
     Write-Host $Message -ForegroundColor Red
@@ -76,6 +78,28 @@ Function MyThrow ($Message) {
   }
 }
 
+Function DownloadNinja() {
+  Write-Host "Unable to find Ninja, downloading a portable version on-the-fly" -ForegroundColor Yellow
+  Remove-Item -Force -Recurse -ErrorAction SilentlyContinue ninja
+  Remove-Item -Force -ErrorAction SilentlyContinue ninja.zip
+  if ($IsWindows -or $IsWindowsPowerShell) {
+    $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip"
+  }
+  elseif ($IsLinux) {
+    $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip"
+  }
+  elseif ($IsMacOS) {
+    $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-mac.zip"
+  }
+  else {
+    MyThrow("Unknown OS, unsupported")
+  }
+  Invoke-RestMethod -Uri $url -Method Get -ContentType application/zip -OutFile "ninja.zip"
+  Expand-Archive -Path ninja.zip
+  Remove-Item -Force -ErrorAction SilentlyContinue ninja.zip
+}
+
+
 Write-Host "Darknet build script version ${build_ps1_version}"
 
 if ((-Not $DisableInteractive) -and (-Not $UseVCPKG)) {
@@ -271,10 +295,15 @@ else {
 if (-Not $DoNotUseNinja) {
   $NINJA_EXE = Get-Command ninja 2> $null | Select-Object -ExpandProperty Definition
   if (-Not $NINJA_EXE) {
-    $DoNotUseNinja = $true
-    Write-Host "Could not find Ninja, using msbuild or make backends as a fallback" -ForegroundColor Yellow
+    DownloadNinja
+    $env:PATH += ";${PSScriptRoot}/ninja"
+    $NINJA_EXE = Get-Command ninja 2> $null | Select-Object -ExpandProperty Definition
+    if (-Not $NINJA_EXE) {
+      $DoNotUseNinja = $true
+      Write-Host "Could not find Ninja, unable to download a portable ninja, using msbuild or make backends as a fallback" -ForegroundColor Yellow
+    }
   }
-  else {
+  if ($NINJA_EXE) {
     Write-Host "Using Ninja from ${NINJA_EXE}"
     Write-Host -NoNewLine "Ninja version "
     $proc = Start-Process -NoNewWindow -PassThru -FilePath ${NINJA_EXE} -ArgumentList "--version"
@@ -287,6 +316,7 @@ if (-Not $DoNotUseNinja) {
     }
     else {
       $generator = "Ninja"
+      $AdditionalBuildSetup = $AdditionalBuildSetup + " -DCMAKE_BUILD_TYPE=Release"
     }
   }
 }
@@ -430,6 +460,27 @@ if ($UseVCPKG -and ($vcpkg_path.length -gt 40) -and ($IsWindows -or $IsWindowsPo
   }
 }
 
+if ($ForceVCPKGCacheRemoval -and (-Not $UseVCPKG)) {
+  Write-Host "VCPKG is not enabled, so local vcpkg binary cache will not be deleted even if requested" -ForegroundColor Yellow
+}
+
+if ($UseVCPKG -and $ForceVCPKGCacheRemoval) {
+  if ($IsWindows -or $IsWindowsPowerShell) {
+    $vcpkgbinarycachepath = "$env:LOCALAPPDATA/vcpkg/archive"
+  }
+  elseif ($IsLinux) {
+    $vcpkgbinarycachepath = "$env:HOME/.cache/vcpkg/archive"
+  }
+  elseif ($IsMacOS) {
+    $vcpkgbinarycachepath = "$env:HOME/.cache/vcpkg/archive"
+  }
+  else {
+    MyThrow("Unknown OS, unsupported")
+  }
+  Write-Host "Removing local vcpkg binary cache from $vcpkgbinarycachepath" -ForegroundColor Yellow
+  Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $vcpkgbinarycachepath
+}
+
 if (-Not $DoNotSetupVS) {
   if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) {
     $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
@@ -472,6 +523,7 @@ if (-Not $DoNotSetupVS) {
 }
 if ($DoNotSetupVS -and $DoNotUseNinja) {
   $generator = "Unix Makefiles"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DCMAKE_BUILD_TYPE=Release"
 }
 Write-Host "Setting up environment to use CMake generator: $generator"
 

From 5853e51d604712918bd2fb23bab0ec82d19a88f9 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Wed, 26 May 2021 22:58:29 +0200
Subject: [PATCH 19/46] [build.ps1] fix 64bit build with developer powershell
 for VS19 (#7740)

* [build.ps1] use x64 also on developer powershell for VS

* improve handling of errors on windows platform

* [build.ps1] bump version

* improve logic handling in uselib_track
---
 CMakeLists.txt | 12 +++++++-----
 build.ps1      | 16 +++++++++-------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 49df87cf3a8..adfc709ac30 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -237,17 +237,19 @@ set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${SHAREDLIB_CXX_FLAGS} ${CMAKE_CXX_
 set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${SHAREDLIB_C_FLAGS} ${CMAKE_C_FLAGS}")
 
 if(OpenCV_FOUND)
-  if(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION)
-    set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE)
-    message(STATUS "  ->  darknet is fine for now, but uselib_track has been disabled!")
-    message(STATUS "  ->  Please rebuild OpenCV from sources with CUDA support to enable it")
-  elseif(ENABLE_CUDA AND OpenCV_CUDA_VERSION)
+  if(ENABLE_CUDA AND OpenCV_CUDA_VERSION)
     if(TARGET opencv_cudaoptflow)
       list(APPEND OpenCV_LINKED_COMPONENTS "opencv_cudaoptflow")
     endif()
     if(TARGET opencv_cudaimgproc)
       list(APPEND OpenCV_LINKED_COMPONENTS "opencv_cudaimgproc")
     endif()
+  elseif(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION)
+    set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE)
+    message(STATUS "  ->  darknet is fine for now, but uselib_track has been disabled!")
+    message(STATUS "  ->  Please rebuild OpenCV from sources with CUDA support to enable it")
+  else()
+    set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE)
   endif()
 endif()
 
diff --git a/build.ps1 b/build.ps1
index 9e33c3f5186..61e70425783 100755
--- a/build.ps1
+++ b/build.ps1
@@ -22,7 +22,7 @@ param (
   [string]$AdditionalBuildSetup = ""  # "-DCMAKE_CUDA_ARCHITECTURES=30"
 )
 
-$build_ps1_version = "0.9.2"
+$build_ps1_version = "0.9.3"
 
 Function MyThrow ($Message) {
   if ($DisableInteractive) {
@@ -252,7 +252,7 @@ else {
 
 Push-Location $PSScriptRoot
 
-$GIT_EXE = Get-Command git 2> $null | Select-Object -ExpandProperty Definition
+$GIT_EXE = Get-Command "git" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
 if (-Not $GIT_EXE) {
   MyThrow("Could not find git, please install it")
 }
@@ -277,7 +277,7 @@ if (Test-Path "$PSScriptRoot/.git") {
   }
 }
 
-$CMAKE_EXE = Get-Command cmake 2> $null | Select-Object -ExpandProperty Definition
+$CMAKE_EXE = Get-Command "cmake" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
 if (-Not $CMAKE_EXE) {
   MyThrow("Could not find CMake, please install it")
 }
@@ -293,11 +293,11 @@ else {
 }
 
 if (-Not $DoNotUseNinja) {
-  $NINJA_EXE = Get-Command ninja 2> $null | Select-Object -ExpandProperty Definition
+  $NINJA_EXE = Get-Command "ninja" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
   if (-Not $NINJA_EXE) {
     DownloadNinja
     $env:PATH += ";${PSScriptRoot}/ninja"
-    $NINJA_EXE = Get-Command ninja 2> $null | Select-Object -ExpandProperty Definition
+    $NINJA_EXE = Get-Command "ninja" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
     if (-Not $NINJA_EXE) {
       $DoNotUseNinja = $true
       Write-Host "Could not find Ninja, unable to download a portable ninja, using msbuild or make backends as a fallback" -ForegroundColor Yellow
@@ -482,7 +482,8 @@ if ($UseVCPKG -and $ForceVCPKGCacheRemoval) {
 }
 
 if (-Not $DoNotSetupVS) {
-  if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) {
+  $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
+  if ((-Not $CL_EXE) -or ($CL_EXE -match "HostX86\\x86") -or ($CL_EXE -match "HostX64\\x86")) {
     $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
     Write-Host "Found VS in ${vsfound}"
     Push-Location "${vsfound}\Common7\Tools"
@@ -528,7 +529,8 @@ if ($DoNotSetupVS -and $DoNotUseNinja) {
 Write-Host "Setting up environment to use CMake generator: $generator"
 
 if (-Not $IsMacOS -and $EnableCUDA) {
-  if ($null -eq (Get-Command "nvcc" -ErrorAction SilentlyContinue)) {
+  $NVCC_EXE = Get-Command "nvcc" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition
+  if (-Not $NVCC_EXE) {
     if (Test-Path env:CUDA_PATH) {
       $env:PATH += ";${env:CUDA_PATH}/bin"
       Write-Host "Found cuda in ${env:CUDA_PATH}"

From 8c85eb7784f0d85a1515dc2499479bbf118f2210 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 4 Jun 2021 14:02:07 +0200
Subject: [PATCH 20/46] [build.ps1/setup.sh] do not export vcpkg_root anymore,
 add setup scripts to CI (#7751)

* [build.ps1] do not export vcpkg_root anymore

* [README.md] add a build step related to Set-ExecutionPolicy on Windows

* use scope currentuser to avoid admin powershell

* simplify even more set-executionpolicy command order

* remove unnecessary decoration to shell commands to improve user experience with copy button

* write instructions to build using only cmake

* Update README.md

* add target install

* improve setup scripts for future tests

* improve setup.sh script for more tool installations and add command line flags handling

* add CI tests for setup scripts

* use nuget artifact cache to improve CI build times

* disable interactivity for build script when called from setup script, add kitware gpg key in CI

* enable cudnn in setup.sh

* add a workaround for missing nvidia drivers in CI

* setup CUDA env variables

* remove some very slow builds from travis and move them to github actions

* chmod sh scripts

* use deploy-cuda scripts also in PR pipelines

* install asm tools

* restore links missing for CI when using deploy-cuda.sh

* enable cudnn on ubuntu ci pipelines

* pass CUDA env variables

* add missing cuda install for opencv2 and opencv3 ci pipelines

* remove unnecessary steps

Co-authored-by: Alexey <AlexeyAB@users.noreply.github.com>
---
 .github/workflows/ccpp.yml    | 182 ++++++++---
 .github/workflows/on_pr.yml   | 170 ++++++++---
 .gitignore                    |   1 +
 .travis.yml                   |  60 ----
 CMakeLists.txt                |  39 ++-
 README.md                     | 555 +++++++++++++++++-----------------
 build.ps1                     |  30 +-
 scripts/deploy-cuda.sh        |  38 +++
 scripts/dice_label.sh         |   0
 scripts/get_coco2017.sh       |   0
 scripts/get_coco_dataset.sh   |   0
 scripts/get_imagenet_train.sh |   0
 scripts/imagenet_label.sh     |   0
 scripts/install_OpenCV4.sh    |   0
 scripts/setup.ps1             |  32 +-
 scripts/setup.sh              | 119 ++++----
 16 files changed, 718 insertions(+), 508 deletions(-)
 create mode 100755 scripts/deploy-cuda.sh
 mode change 100644 => 100755 scripts/dice_label.sh
 mode change 100644 => 100755 scripts/get_coco2017.sh
 mode change 100644 => 100755 scripts/get_coco_dataset.sh
 mode change 100644 => 100755 scripts/get_imagenet_train.sh
 mode change 100644 => 100755 scripts/imagenet_label.sh
 mode change 100644 => 100755 scripts/install_OpenCV4.sh

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index dd8a98dd856..b26e0701ac0 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -17,24 +17,13 @@ jobs:
       run: sudo apt install libopencv-dev
 
     - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
       run: |
-        sudo apt update
-        sudo apt-get dist-upgrade -y
-        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-        sudo apt-get install -y --no-install-recommends libcudnn8-dev
-        sudo rm -rf /usr/local/cuda
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
-        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
-        export PATH=/usr/local/cuda/bin:$PATH
-        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
-        nvcc --version
-        gcc --version
 
     - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0'
       run: |
@@ -72,37 +61,21 @@ jobs:
         make clean
 
 
-  ubuntu-vcpkg-cuda:
+  ubuntu-vcpkg-opencv4-cuda:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v2
 
-    - name: Update apt
-      run: sudo apt update
-    - name: Install dependencies
-      run: sudo apt install yasm nasm
-
     - uses: lukka/get-cmake@latest
 
     - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
       run: |
-        sudo apt update
-        sudo apt-get dist-upgrade -y
-        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-        sudo apt-get install -y --no-install-recommends libcudnn8-dev
-        sudo rm -rf /usr/local/cuda
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
-        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
-        export PATH=/usr/local/cuda/bin:$PATH
-        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
-        nvcc --version
-        gcc --version
 
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
@@ -123,7 +96,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -143,6 +116,82 @@ jobs:
         path: ${{ github.workspace }}/uselib*
 
 
+  ubuntu-vcpkg-opencv3-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
+      run: |
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateDARKNET
+
+
+  ubuntu-vcpkg-opencv2-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
+      run: |
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateDARKNET
+
+
   ubuntu:
     runs-on: ubuntu-20.04
     steps:
@@ -195,24 +244,13 @@ jobs:
     - uses: lukka/get-cmake@latest
 
     - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
       run: |
-        sudo apt update
-        sudo apt-get dist-upgrade -y
-        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-        sudo apt-get install -y --no-install-recommends libcudnn8-dev
-        sudo rm -rf /usr/local/cuda
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
-        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
-        export PATH=/usr/local/cuda/bin:$PATH
-        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
-        nvcc --version
-        gcc --version
 
     - name: 'Build'
       shell: pwsh
@@ -221,7 +259,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+      run: ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -253,6 +291,28 @@ jobs:
       run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
+  ubuntu-setup-sh:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Setup'
+      shell: bash
+      run: ./scripts/setup.sh -InstallCUDA -BypassDRIVER
+
+
   osx-vcpkg:
     runs-on: macos-latest
     steps:
@@ -419,6 +479,28 @@ jobs:
         path: ${{ github.workspace }}/uselib*
 
 
+  win-setup-ps1:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Setup'
+      shell: pwsh
+      run: ./scripts/setup.ps1 -InstallCUDA
+
+
   win-intlibs-cpp:
     runs-on: windows-latest
     steps:
diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml
index 198d84fc4e0..42c03d696be 100644
--- a/.github/workflows/on_pr.yml
+++ b/.github/workflows/on_pr.yml
@@ -17,24 +17,13 @@ jobs:
       run: sudo apt install libopencv-dev
 
     - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
       run: |
-        sudo apt update
-        sudo apt-get dist-upgrade -y
-        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-        sudo apt-get install -y --no-install-recommends libcudnn8-dev
-        sudo rm -rf /usr/local/cuda
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
-        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
-        export PATH=/usr/local/cuda/bin:$PATH
-        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
-        nvcc --version
-        gcc --version
 
     - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0'
       run: |
@@ -72,37 +61,21 @@ jobs:
         make clean
 
 
-  ubuntu-vcpkg-cuda:
+  ubuntu-vcpkg-opencv4-cuda:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v2
 
-    - name: Update apt
-      run: sudo apt update
-    - name: Install dependencies
-      run: sudo apt install yasm nasm
-
     - uses: lukka/get-cmake@latest
 
     - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
       run: |
-        sudo apt update
-        sudo apt-get dist-upgrade -y
-        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-        sudo apt-get install -y --no-install-recommends libcudnn8-dev
-        sudo rm -rf /usr/local/cuda
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
-        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
-        export PATH=/usr/local/cuda/bin:$PATH
-        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
-        nvcc --version
-        gcc --version
 
     - name: 'Setup vcpkg and NuGet artifacts backend'
       shell: bash
@@ -120,7 +93,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -140,6 +113,76 @@ jobs:
         path: ${{ github.workspace }}/uselib*
 
 
+  ubuntu-vcpkg-opencv3-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
+      run: |
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateDARKNET
+
+
+  ubuntu-vcpkg-opencv2-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
+      run: |
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      env:
+        CUDACXX: "/usr/local/cuda/bin/nvcc"
+        CUDA_PATH: "/usr/local/cuda"
+        CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
+        LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateDARKNET
+
+
   ubuntu:
     runs-on: ubuntu-20.04
     steps:
@@ -192,24 +235,13 @@ jobs:
     - uses: lukka/get-cmake@latest
 
     - name: 'Install CUDA'
+      run: ./scripts/deploy-cuda.sh
+
+    - name: 'Create softlinks for CUDA'
       run: |
-        sudo apt update
-        sudo apt-get dist-upgrade -y
-        sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-        sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-        sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-        sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-        sudo apt-get install -y --no-install-recommends libcudnn8-dev
-        sudo rm -rf /usr/local/cuda
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
         sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
-        sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
-        export PATH=/usr/local/cuda/bin:$PATH
-        export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
-        nvcc --version
-        gcc --version
 
     - name: 'Build'
       shell: pwsh
@@ -218,7 +250,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+      run: ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -250,6 +282,25 @@ jobs:
       run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
+  ubuntu-setup-sh:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Setup'
+      shell: bash
+      run: ./scripts/setup.sh -InstallCUDA -BypassDRIVER
+
+
   osx-vcpkg:
     runs-on: macos-latest
     steps:
@@ -410,6 +461,25 @@ jobs:
         path: ${{ github.workspace }}/uselib*
 
 
+  win-setup-ps1:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Setup'
+      shell: pwsh
+      run: ./scripts/setup.ps1 -InstallCUDA
+
+
   win-intlibs-cpp:
     runs-on: windows-latest
     steps:
diff --git a/.gitignore b/.gitignore
index 716233d08c3..916cfb88461 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,6 +38,7 @@ build/.ninja_deps
 build/.ninja_log
 build/Makefile
 */vcpkg-manifest-install.log
+build.log
 
 # OS Generated #
 .DS_Store*
diff --git a/.travis.yml b/.travis.yml
index 447a72a179d..f208498dbcd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,32 +16,6 @@ matrix:
         - additional_defines=" -DENABLE_CUDA=OFF -DENABLE_CUDNN=OFF -DENABLE_OPENCV=OFF"
         - MATRIX_EVAL=""
 
-    - os: osx
-      compiler: gcc
-      name: macOS - gcc (llvm backend) - opencv@2
-      osx_image: xcode12.3
-      env:
-        - OpenCV_DIR="/usr/local/opt/opencv@2/"
-        - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv@2"
-
-    - os: osx
-      compiler: gcc
-      name: macOS - gcc (llvm backend) - opencv@3
-      osx_image: xcode12.3
-      env:
-        - OpenCV_DIR="/usr/local/opt/opencv@3/"
-        - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv@3"
-
-    - os: osx
-      compiler: gcc
-      name: macOS - gcc (llvm backend) - opencv(latest)
-      osx_image: xcode12.3
-      env:
-        - additional_defines=" -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv"
-
     - os: osx
       compiler: clang
       name: macOS - clang
@@ -58,40 +32,6 @@ matrix:
         - additional_defines="-DBUILD_AS_CPP:BOOL=TRUE -DENABLE_CUDA=OFF -DENABLE_CUDNN=OFF -DENABLE_OPENCV=OFF"
         - MATRIX_EVAL=""
 
-    - os: osx
-      compiler: clang
-      name: macOS - clang - opencv@2
-      osx_image: xcode12.3
-      env:
-        - OpenCV_DIR="/usr/local/opt/opencv@2/"
-        - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv@2"
-
-    - os: osx
-      compiler: clang
-      name: macOS - clang - opencv@3
-      osx_image: xcode12.3
-      env:
-        - OpenCV_DIR="/usr/local/opt/opencv@3/"
-        - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv@3"
-
-    - os: osx
-      compiler: clang
-      name: macOS - clang - opencv(latest)
-      osx_image: xcode12.3
-      env:
-        - additional_defines=" -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv"
-
-    - os: osx
-      compiler: clang
-      name: macOS - clang - opencv(latest) - libomp
-      osx_image: xcode12.3
-      env:
-        - additional_defines=" -DENABLE_CUDA=OFF"
-        - MATRIX_EVAL="brew install opencv libomp"
-
     - os: linux
       compiler: clang
       dist: bionic
diff --git a/CMakeLists.txt b/CMakeLists.txt
index adfc709ac30..b3630f516dc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,19 +22,48 @@ option(ENABLE_CUDNN_HALF "Enable CUDNN Half precision" ON)
 option(ENABLE_ZED_CAMERA "Enable ZED Camera support" ON)
 option(ENABLE_VCPKG_INTEGRATION "Enable VCPKG integration" ON)
 option(VCPKG_BUILD_OPENCV_WITH_CUDA "Build OpenCV with CUDA extension integration" ON)
+option(VCPKG_USE_OPENCV2 "Use legacy OpenCV 2" OFF)
+option(VCPKG_USE_OPENCV3 "Use legacy OpenCV 3" OFF)
+option(VCPKG_USE_OPENCV4 "Use OpenCV 4" ON)
 
-if(VCPKG_BUILD_OPENCV_WITH_CUDA AND NOT APPLE)
-  list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda")
+if(VCPKG_USE_OPENCV4 AND VCPKG_USE_OPENCV2)
+  message(STATUS "You required vcpkg feature related to OpenCV 2 but forgot to turn off those for OpenCV 4, doing that for you")
+  set(VCPKG_USE_OPENCV4 OFF CACHE BOOL "Use OpenCV 4" FORCE)
 endif()
+if(VCPKG_USE_OPENCV4 AND VCPKG_USE_OPENCV3)
+  message(STATUS "You required vcpkg feature related to OpenCV 3 but forgot to turn off those for OpenCV 4, doing that for you")
+  set(VCPKG_USE_OPENCV4 OFF CACHE BOOL "Use OpenCV 4" FORCE)
+endif()
+if(VCPKG_USE_OPENCV2 AND VCPKG_USE_OPENCV3)
+  message(STATUS "You required vcpkg features related to both OpenCV 2 and OpenCV 3. Impossible to satisfy, keeping only OpenCV 3")
+  set(VCPKG_USE_OPENCV2 OFF CACHE BOOL "Use legacy OpenCV 2" FORCE)
+endif()
+
 if(ENABLE_CUDA AND NOT APPLE)
   list(APPEND VCPKG_MANIFEST_FEATURES "cuda")
 endif()
-if(ENABLE_OPENCV)
-  list(APPEND VCPKG_MANIFEST_FEATURES "opencv-base")
-endif()
 if(ENABLE_CUDNN AND ENABLE_CUDA AND NOT APPLE)
   list(APPEND VCPKG_MANIFEST_FEATURES "cudnn")
 endif()
+if(ENABLE_OPENCV)
+  if(VCPKG_BUILD_OPENCV_WITH_CUDA AND NOT APPLE)
+    if(VCPKG_USE_OPENCV4)
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda")
+    elseif(VCPKG_USE_OPENCV3)
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-cuda")
+    elseif(VCPKG_USE_OPENCV2)
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-cuda")
+    endif()
+  else()
+    if(VCPKG_USE_OPENCV4)
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv-base")
+    elseif(VCPKG_USE_OPENCV3)
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-base")
+    elseif(VCPKG_USE_OPENCV2)
+      list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-base")
+    endif()
+  endif()
+endif()
 
 if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32)
   execute_process(COMMAND "uname" "-m" OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR OUTPUT_STRIP_TRAILING_WHITESPACE)
diff --git a/README.md b/README.md
index acd2506d16c..cc85070b24a 100644
--- a/README.md
+++ b/README.md
@@ -8,16 +8,16 @@ Paper Scaled YOLO v4: https://arxiv.org/abs/2011.08036  use to reproduce results
 
 More details in articles on medium:
 
-* [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) 
-* [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) 
+- [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8)
+- [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7)
 
 Manual: https://github.com/AlexeyAB/darknet/wiki
 
 Discussion:
 
-* [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/)
-* [Google-groups](https://groups.google.com/forum/#!forum/darknet)
-* [Discord](https://discord.gg/zSq8rtW)
+- [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/)
+- [Google-groups](https://groups.google.com/forum/#!forum/darknet)
+- [Discord](https://discord.gg/zSq8rtW)
 
 About Darknet framework: http://pjreddie.com/darknet/
 
@@ -31,13 +31,13 @@ About Darknet framework: http://pjreddie.com/darknet/
 [![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE)
 [![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg)
 
-* [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
-* [Requirements (and how to install dependencies)](#requirements)
-* [Pre-trained models](#pre-trained-models)
-* [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
-* [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)
-* [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks)
-* [Datasets](#datasets)
+- [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
+- [Requirements (and how to install dependencies)](#requirements)
+- [Pre-trained models](#pre-trained-models)
+- [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
+- [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)
+- [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks)
+- [Datasets](#datasets)
 
 - [Yolo v4, v3 and v2 for Windows and Linux](#yolo-v4-v3-and-v2-for-windows-and-linux)
   - [(neural networks for object detection)](#neural-networks-for-object-detection)
@@ -46,39 +46,38 @@ About Darknet framework: http://pjreddie.com/darknet/
       - [How to evaluate AP of YOLOv4 on the MS COCO evaluation server](#how-to-evaluate-ap-of-yolov4-on-the-ms-coco-evaluation-server)
       - [How to evaluate FPS of YOLOv4 on GPU](#how-to-evaluate-fps-of-yolov4-on-gpu)
       - [Pre-trained models](#pre-trained-models)
-    - [Requirements](#requirements)
+    - [Requirements for Windows, Linux and macOS](#requirements-for-windows-linux-and-macos)
     - [Yolo v4 in other frameworks](#yolo-v4-in-other-frameworks)
       - [Datasets](#datasets)
     - [Improvements in this repository](#improvements-in-this-repository)
       - [How to use on the command line](#how-to-use-on-the-command-line)
         - [For using network video-camera mjpeg-stream with any Android smartphone](#for-using-network-video-camera-mjpeg-stream-with-any-android-smartphone)
     - [How to compile on Linux/macOS (using `CMake`)](#how-to-compile-on-linuxmacos-using-cmake)
-    - [Using `vcpkg`](#using-vcpkg)
-    - [Using libraries manually provided](#using-libraries-manually-provided)
+    - [Using also PowerShell](#using-also-powershell)
     - [How to compile on Linux (using `make`)](#how-to-compile-on-linux-using-make)
     - [How to compile on Windows (using `CMake`)](#how-to-compile-on-windows-using-cmake)
     - [How to compile on Windows (using `vcpkg`)](#how-to-compile-on-windows-using-vcpkg)
   - [How to train with multi-GPU](#how-to-train-with-multi-gpu)
   - [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
-    - [How to train tiny-yolo (to detect your custom objects):](#how-to-train-tiny-yolo-to-detect-your-custom-objects)
-  - [When should I stop training:](#when-should-i-stop-training)
+    - [How to train tiny-yolo (to detect your custom objects)](#how-to-train-tiny-yolo-to-detect-your-custom-objects)
+  - [When should I stop training](#when-should-i-stop-training)
     - [Custom object detection](#custom-object-detection)
   - [How to improve object detection](#how-to-improve-object-detection)
   - [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files)
   - [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries)
 
-![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) 
+![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png)
 
 ![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036
 
 ----
 
-![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 
+![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934
 
 tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4.
 
-* tkDNN: https://github.com/ceccocats/tkDNN
-* OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf
+- tkDNN: https://github.com/ceccocats/tkDNN
+- OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf
 
 ### GeForce RTX 2080 Ti
 
@@ -91,13 +90,13 @@ tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times
 |Tiny 416                    | 443                | 609                      | **790**                  | 773              | **1774**                         | 1353                     | **3.5x**      |
 |Tiny 416 CPU Core i7 7700HQ | 3.4                | -                        | -                        | 42               | -                                | 39                       | **12x**       |
 
-* Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png)
-* Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png)
-* CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks
-* Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg)
-* Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf
-* Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg
-* Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg
+- Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png)
+- Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png)
+- CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks
+- Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg)
+- Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf
+- Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg
+- Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg
 
 #### Youtube video of results
 
@@ -134,9 +133,9 @@ eval=coco
 3. Get any .avi/.mp4 video file (preferably not more than 1920x1080 to avoid bottlenecks in CPU performance)
 4. Run one of two commands and look at the AVG FPS:
 
-* include video_capturing + NMS + drawing_bboxes: 
+- include video_capturing + NMS + drawing_bboxes:
     `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -dont_show -ext_output`
-* exclude video_capturing + NMS + drawing_bboxes: 
+- exclude video_capturing + NMS + drawing_bboxes:
     `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -benchmark`
 
 #### Pre-trained models
@@ -145,52 +144,52 @@ There are weights-file for different cfg-files (trained for MS COCO dataset):
 
 FPS on RTX 2070 (R) and Tesla V100 (V):
 
-* [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights) 
-  * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166
+- [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights)
+  - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166
 
-* [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036)
+- [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036)
 
     just change `width=` and `height=` parameters in `yolov4-csp.cfg` file and use the same `yolov4-csp.weights` file for all cases:
-  * `width=640 height=640` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops
-  * `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops
-  * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142
+  - `width=640 height=640` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops
+  - `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops
+  - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142
 
-* [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934)
+- [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934)
     just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases:
-  * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops
-  * `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops
-  * `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops
-  * `width=320 height=320` in cfg:   **60% mAP@0.5 (  38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops
+  - `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops
+  - `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops
+  - `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops
+  - `width=320 height=320` in cfg:   **60% mAP@0.5 (  38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops
 
-* [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)
+- [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights)
 
-* [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view)
+- [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view)
 
-* [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights)
+- [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights)
 
 <details><summary><b>CLICK ME</b> - Yolo v3 models</summary>
 
-* [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc)
+- [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc)
 
-* [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights)
+- [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights)
 
-* [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing)
+- [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing)
 
-* [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights)
+- [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights)
 
-* [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights)
+- [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights)
 
-* [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing)
+- [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing)
 
 </details>
 
 <details><summary><b>CLICK ME</b> - Yolo v2 models</summary>
 
-* `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights
-* `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights
-* `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights
-* `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights
-* `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights
+- `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights
+- `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights
+- `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights
+- `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights
+- `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights
 
 </details>
 
@@ -198,76 +197,76 @@ Put it near compiled: darknet.exe
 
 You can get cfg-files by path: `darknet/cfg/`
 
-### Requirements
+### Requirements for Windows, Linux and macOS
 
-* **CMake >= 3.18**: https://cmake.org/download/
-* **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell
-* **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions))
-* **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png))
-* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
-* **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
+- **CMake >= 3.18**: https://cmake.org/download/
+- **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell
+- **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions))
+- **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png))
+- **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
+- **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
 
 ### Yolo v4 in other frameworks
 
-* **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4
-* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite
+- **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4
+- **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite
     Official TF models: https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/yolo
     For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
-* **OpenCV** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
-* **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) (for [Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large) models use https://github.com/Chen-MingChang/pytorch_YOLO_OpenVINO_demo )
-* **PyTorch > ONNX**:
-  * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
-  * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
-  * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
-  * [YOLOv5](https://github.com/ultralytics/yolov5)
-* **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
-* **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results
-* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
-* **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt
-* **DirectML** https://github.com/microsoft/DirectML/tree/master/Samples/yolov4
-* **OpenCL** (Intel, AMD, Mali GPUs for macOS & GNU/Linux) https://github.com/sowson/darknet
-* **HIP** for Training and Detection on AMD GPU https://github.com/os-hackathon/darknet
-* **ROS** (Robot Operating System) https://github.com/engcang/ros-yolo-sort
-* **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial
-* **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
-* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
-* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn
-* **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
-* **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
+- **OpenCV** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
+- **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) (for [Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large) models use https://github.com/Chen-MingChang/pytorch_YOLO_OpenVINO_demo )
+- **PyTorch > ONNX**:
+  - [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4)
+  - [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
+  - [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
+  - [YOLOv5](https://github.com/ultralytics/yolov5)
+- **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
+- **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results
+- **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
+- **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt
+- **DirectML** https://github.com/microsoft/DirectML/tree/master/Samples/yolov4
+- **OpenCL** (Intel, AMD, Mali GPUs for macOS & GNU/Linux) https://github.com/sowson/darknet
+- **HIP** for Training and Detection on AMD GPU https://github.com/os-hackathon/darknet
+- **ROS** (Robot Operating System) https://github.com/engcang/ros-yolo-sort
+- **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial
+- **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
+- **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
+- **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn
+- **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
+- **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
 
 #### Datasets
 
-* MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset
-* OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset
-* Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets
-* ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set)
-* German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task
-* List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets
+- MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset
+- OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset
+- Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets
+- ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set)
+- German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task
+- List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets
 
 ### Improvements in this repository
 
-* developed State-of-the-Art object detector YOLOv4
-* added State-of-Art models: CSP, PRN, EfficientNet
-* added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm]
-* added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video
-* added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX
-* added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync)
-* improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg
-* improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm
-* improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln`
-* improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... 
-* improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta
-* improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**)
-* optimized memory allocation during network resizing when `random=1`
-* optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1
-* added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`...
-* added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training
-* run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser
-* added calculation of anchors for training
-* added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
-* run-time tips and warnings if you use incorrect cfg-file or dataset
-* added support for Windows
-* many other fixes of code...
+- developed State-of-the-Art object detector YOLOv4
+- added State-of-Art models: CSP, PRN, EfficientNet
+- added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm]
+- added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video
+- added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX
+- added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync)
+- improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg
+- improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm
+- improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln`
+- improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`...
+- improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta
+- improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**)
+- optimized memory allocation during network resizing when `random=1`
+- optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1
+- added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`...
+- added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training
+- run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser
+- added calculation of anchors for training
+- added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+- run-time tips and warnings if you use incorrect cfg-file or dataset
+- added support for Windows
+- many other fixes of code...
 
 And added manual - [How to train Yolo v4-v2 (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects)
 
@@ -277,77 +276,78 @@ Also, you might be interested in using a simplified repository where is implemen
 
 On Linux use `./darknet` instead of `darknet.exe`, like this:`./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights`
 
-On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` 
-
-* Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25`
-* **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg`
-* Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4`
-* Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0`
-* Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg`
-* Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi`
-* Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4`
-* **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output`
-* Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4`
-* Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25`
-* Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): 
+On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64`
+
+- Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25`
+- **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg`
+- Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4`
+- Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0`
+- Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg`
+- Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi`
+- Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4`
+- **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output`
+- Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4`
+- Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25`
+- Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**):
     `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map`
-* 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights`
-* Remember to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
-* To process a list of images `data/train.txt` and save results of detection to `result.json` file use: 
+- 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights`
+- Remember to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
+- To process a list of images `data/train.txt` and save results of detection to `result.json` file use:
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt`
-* To process a list of images `data/train.txt` and save results of detection to `result.txt` use:                             
+- To process a list of images `data/train.txt` and save results of detection to `result.txt` use:
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt`
-* Pseudo-labelling - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
+- Pseudo-labelling - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt`
-* To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
-* To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
-* To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75`
+- To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
+- To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
+- To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75`
 
 ##### For using network video-camera mjpeg-stream with any Android smartphone
 
 1. Download for Android phone mjpeg-stream soft: IP Webcam / Smart WebCam
 
-    * Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2
-    * IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam
+    - Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2
+    - IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam
 
 2. Connect your Android phone to computer by WiFi (through a WiFi-router) or USB
 3. Start Smart WebCam on your phone
 4. Replace the address below, on shown in the phone application (Smart WebCam) and launch:
 
-* Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0`
+- Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0`
 
 ### How to compile on Linux/macOS (using `CMake`)
 
 The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development.
 
-Install powershell if you do not already have it ([guide here](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell)).
+To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware.com/ or https://cmake.org/download/
 
-To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware.com/
-
-### Using `vcpkg`
-
-Open a shell and type these commands
-
-```PowerShell
-PS Code/>              git clone https://github.com/AlexeyAB/darknet
-PS Code/>              cd darknet
-PS Code/darknet>       ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
+```bash
+git clone https://github.com/AlexeyAB/darknet
+cd darknet
+mkdir build_release
+cd build_release
+cmake ..
+cmake --build . --target install --parallel 8
 ```
 
-(add option `-EnableOPENCV_CUDA` if you want to build OpenCV with CUDA support - very slow to build!)
-If you open the `build.ps1` script at the beginning you will find all available switches.
+### Using also PowerShell
+
+Install: `Cmake`, `CUDA`, `cuDNN` [How to install dependencies](#requirements)
 
-### Using libraries manually provided
+Install powershell for your OS (Linux or MacOS) ([guide here](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell)).
 
-Open a shell and type these commands
+Open PowerShell type these commands
 
 ```PowerShell
-PS Code/>              git clone https://github.com/AlexeyAB/darknet
-PS Code/>              cd darknet
-PS Code/darknet>       ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN
+git clone https://github.com/AlexeyAB/darknet
+cd darknet
+./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
 ```
 
-(remove options like `-EnableCUDA` or `-EnableCUDNN` if you are not interested into).
+- remove options like `-EnableCUDA` or `-EnableCUDNN` if you are not interested into
+- remove option `-UseVCPKG` if you plan to manually provide OpenCV library to darknet or if you do not want to enable OpenCV integration
+- add option `-EnableOPENCV_CUDA` if you want to build OpenCV with CUDA support - very slow to build! (requires `-UseVCPKG`)
+
 If you open the `build.ps1` script at the beginning you will find all available switches.
 
 ### How to compile on Linux (using `make`)
@@ -355,17 +355,17 @@ If you open the `build.ps1` script at the beginning you will find all available
 Just do `make` in the darknet directory. (You can try to compile and run it on Google Colab in cloud [link](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) (press «Open in Playground» button at the top-left corner) and watch the video [link](https://www.youtube.com/watch?v=mKAEGSxwOAY) )
 Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1)
 
-* `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`)
-* `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`)
-* `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x
-* `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams
-* `DEBUG=1` to build debug version of Yolo
-* `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU
-* `LIBSO=1` to build a library `darknet.so` and binary runnable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+- `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`)
+- `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`)
+- `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x
+- `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams
+- `DEBUG=1` to build debug version of Yolo
+- `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU
+- `LIBSO=1` to build a library `darknet.so` and binary runnable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
     or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4`
-* `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run
+- `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run
     `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera`
-* You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card.
+- You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card.
 
 To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights`
 
@@ -373,37 +373,37 @@ To run Darknet on Linux use examples from this article, just use `./darknet` ins
 
 Requires:
 
-* MSVC: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
-* CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/
-* Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip)
+- MSVC: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
+- CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/
+- Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip)
 
 In Windows:
 
-* Start (button) -> All programs -> CMake -> CMake (gui) ->
+- Start (button) -> All programs -> CMake -> CMake (gui) ->
 
-* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64`  -> Finish -> Generate -> Open Project ->
+- [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64`  -> Finish -> Generate -> Open Project ->
 
-* in MS Visual Studio: Select: x64 and Release -> Build -> Build solution
+- in MS Visual Studio: Select: x64 and Release -> Build -> Build solution
 
-* find the executable file `darknet.exe` in the output path to the binaries you specified
+- find the executable file `darknet.exe` in the output path to the binaries you specified
 
 ![x64 and Release](https://habrastorage.org/webt/ay/ty/f-/aytyf-8bufe7q-16yoecommlwys.jpeg)
 
-
 ### How to compile on Windows (using `vcpkg`)
 
 This is the recommended approach to build Darknet on Windows.
 
 1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com). Remember to install English language pack, this is mandatory for vcpkg!
 
-2. Install CUDA (at least v10.0) enabling VS Integration during installation.
+2. Install CUDA enabling VS Integration during installation.
 
 3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands:
 
 ```PowerShell
-PS Code/>              git clone https://github.com/AlexeyAB/darknet
-PS Code/>              cd darknet
-PS Code/darknet>       .\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
+Set-ExecutionPolicy unrestricted -Scope CurrentUser -Force
+git clone https://github.com/AlexeyAB/darknet
+cd darknet
+.\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
 ```
 
 (add option `-EnableOPENCV_CUDA` if you want to build OpenCV with CUDA support - very slow to build! - or remove options like `-EnableCUDA` or `-EnableCUDNN` if you are not interested in them). If you open the `build.ps1` script at the beginning you will find all available switches.
@@ -427,23 +427,23 @@ Training Yolo v4 (and v3):
 0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
 1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and:
 
-* change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3)
-* change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
-* change line max_batches to (`classes*2000`, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes
-* change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22)
-* set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9
-* change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers:
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783
-* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers.
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689
-  * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776
-* when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608)  layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer
-  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604
-  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696
-  * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789
+- change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3)
+- change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
+- change line max_batches to (`classes*2000`, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes
+- change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22)
+- set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9
+- change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers:
+  - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610
+  - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696
+  - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783
+- change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers.
+  - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603
+  - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689
+  - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776
+- when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608)  layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer
+  - https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604
+  - https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696
+  - https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789
 
 So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`.
 **(Do not write in the cfg-file: filters=(classes + 5)x3)**
@@ -474,19 +474,20 @@ classes=2
 4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\`
 5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark
 
-It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: 
+It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line:
 
 `<object-class> <x_center> <y_center> <width> <height>`
 
-  Where: 
-  * `<object-class>` - integer object number from `0` to `(classes-1)`
-  * `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
-  * for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
-  * attention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
+  Where:
+
+- `<object-class>` - integer object number from `0` to `(classes-1)`
+- `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
+- for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
+- attention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
 
   For example for `img1.jpg` you will be created `img1.txt` containing:
 
-  ```
+  ```csv
   1 0.716797 0.395833 0.216406 0.147222
   0 0.687109 0.379167 0.255469 0.158333
   1 0.420312 0.395833 0.140625 0.166667
@@ -494,38 +495,38 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an
 
 6. Create file `train.txt` in directory `build\darknet\x64\data\`, with filenames of your images, each filename in new line, with path relative to `darknet.exe`, for example containing:
 
-  ```
+  ```csv
   data/obj/img1.jpg
   data/obj/img2.jpg
   data/obj/img3.jpg
   ```
 
 7. Download pre-trained weights for the convolutional layers and put to the directory `build\darknet\x64`
-    * for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
-    * for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29)  
-    * for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing)
-    * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74)
-    * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing)
-    * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing)
+    - for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) )
+    - for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29)  
+    - for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing)
+    - for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74)
+    - for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing)
+    - for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing)
 
 8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137`
 
    To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`)
 
-   * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations)
-   * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations)
-   * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2)
-   * (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser)
+   - (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations)
+   - (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations)
+   - (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2)
+   - (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser)
 
 8.1. For training with mAP (mean average precisions) calculation for each 4 Epochs (set `valid=valid.txt` or `train.txt` in `obj.data` file) and run: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map`
 
 9. After training is complete - get result `yolo-obj_final.weights` from path `build\darknet\x64\backup\`
 
- * After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights`
+   - After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights`
 
     (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`)
 
- * Also you can get result earlier than all 45000 iterations.
+   - Also you can get result earlier than all 45000 iterations.
 
  **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well.
 
@@ -535,19 +536,19 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an
 
   **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
 
-### How to train tiny-yolo (to detect your custom objects):
+### How to train tiny-yolo (to detect your custom objects)
 
 Do all the same steps as for the full yolo model as described above. With the exception of:
 
-* Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29
+- Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29
  (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29`
-* Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg`
-* Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29`
+- Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg`
+- Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29`
 
 For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd
 If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights.
 
-## When should I stop training:
+## When should I stop training
 
 Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual:
 
@@ -559,18 +560,18 @@ Usually sufficient 2000 iterations for each class(object), but not less than num
   > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images
   > Loaded: 0.000000 seconds
 
-  * **9002** - iteration number (number of batch)
-  * **0.60730 avg** - average loss (error) - **the lower, the better**
+- **9002** - iteration number (number of batch)
+- **0.60730 avg** - average loss (error) - **the lower, the better**
 
   When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final average loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
   
-  Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 
+  Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases.
 
 2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them:
 
 For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to over-fitting. **Over-fitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
 
-![Over-fitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
+![Over-fitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png)
 
 To get weights from Early Stopping Point:
 
@@ -580,9 +581,9 @@ To get weights from Early Stopping Point:
 
 (If you use another GitHub repository, then use `darknet.exe detector recall`... instead of `darknet.exe detector map`...)
 
-* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
-* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights`
-* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights`
+- `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
+- `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights`
+- `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights`
 
 And compare last output lines for each weights (7000, 8000, 9000):
 
@@ -590,9 +591,9 @@ Choose weights-file **with the highest mAP (mean average precision)** or IoU (in
 
 For example, **bigger mAP** gives weights `yolo-obj_8000.weights` - then **use this weights for detection**.
 
-Or just train with `-map` flag: 
+Or just train with `-map` flag:
 
-`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` 
+`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map`
 
 So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calculated for each 4 Epochs using `valid=valid.txt` file that is specified in `obj.data` file (`1 Epoch = images_in_train_txt / batch` iterations)
 
@@ -602,9 +603,9 @@ So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calcu
 
 Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
 
-* **IoU** (intersect over union) - average intersect over union of objects and detections for a certain threshold = 0.24
+- **IoU** (intersect over union) - average intersect over union of objects and detections for a certain threshold = 0.24
 
-* **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf
+- **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf
 
 **mAP** is default metric of precision in the PascalVOC competition, **this is the same as AP50** metric in the MS COCO competition.
 In terms of Wiki, indicators Precision and Recall have a slightly different meaning than in the PascalVOC competition, but **IoU always has the same meaning**.
@@ -622,60 +623,60 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol
 
 1. Before training:
 
-* set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788)
+- set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788)
 
-* increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision
+- increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision
 
-* check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark
+- check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark
 
-* my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong.
+- my Loss is very high and mAP is very low, is training wrong? Run training with `-show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong.
 
-* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at different: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
+- for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at different: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
 
-* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects
+- desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects
 
-* What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected.
+- What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected.
 
-* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) 
+- for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file)
   
-* for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895
-  * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892
-  * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989
+- for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895
+  - set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892
+  - set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989
   
-* for training for both small and large objects use modified models:
-  * Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg
-  * Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg
-  * YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg
+- for training for both small and large objects use modified models:
+  - Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg
+  - Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg
+  - YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg
   
-* If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17
+- If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17
   
-* General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: 
-  * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
-  * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`
+- General rule - your training dataset should include such a set of relative sizes of objects that you want to detect:
+  - `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
+  - `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`
 
   I.e. for each object from Test dataset there must be at least 1 object in the Training dataset with the same class_id and about the same relative size:
 
-  `object width in percent from Training dataset` ~= `object width in percent from Test dataset` 
+  `object width in percent from Training dataset` ~= `object width in percent from Test dataset`
 
   That is, if only objects that occupied 80-90% of the image were present in the training set, then the trained network will not be able to detect objects that occupy 1-10% of the image.
 
-* to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file
+- to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file
 
-* each: `model of object, side, illumination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
+- each: `model of object, side, illumination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
 
-* to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5.
+- to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5.
 
-* Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file:
+- Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file:
 `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
 then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so for YOLOv4 the 1st-[yolo]-layer has anchors smaller than 30x30, 2nd smaller than 60x60, 3rd remaining, and vice versa for YOLOv3. Also you should change the `filters=(classes + 5)*<number of mask>` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors.
 
 2. After training - for detection:
 
-* Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9)
+- Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9)
 
-* it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution
+- it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution
 
-* to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
+- to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4)
 
 ## How to mark bounded boxes of objects and create annotation files
 
@@ -697,40 +698,40 @@ Different tools for marking objects in images:
 
 ## How to use Yolo as DLL and SO libraries
 
-* on Linux
-  * using `build.sh` or
-  * build `darknet` using `cmake` or
-  * set `LIBSO=1` in the `Makefile` and do `make`
-* on Windows
-  * using `build.ps1` or
-  * build `darknet` using `cmake` or
-  * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution
+- on Linux
+  - using `build.sh` or
+  - build `darknet` using `cmake` or
+  - set `LIBSO=1` in the `Makefile` and do `make`
+- on Windows
+  - using `build.ps1` or
+  - build `darknet` using `cmake` or
+  - compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution
 
 There are 2 APIs:
 
-* C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h
-  * Python examples using the C API:
-    * https://github.com/AlexeyAB/darknet/blob/master/darknet.py
-    * https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py
+- C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h
+  - Python examples using the C API:
+    - https://github.com/AlexeyAB/darknet/blob/master/darknet.py
+    - https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py
 
-* C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp
-  * C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+- C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp
+  - C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
 
 ----
 
 1. To compile Yolo as C++ DLL-file `yolo_cpp_dll.dll` - open the solution `build\darknet\yolo_cpp_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_cpp_dll
-    * You should have installed **CUDA 10.0**
-    * To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;`
+    - You should have installed **CUDA 10.2**
+    - To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;`
 
 2. To use Yolo as DLL-file in your C++ console application - open the solution `build\darknet\yolo_console_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_console_dll
 
-    * you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe`
+    - you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe`
     **use this command**: `yolo_console_dll.exe data/coco.names yolov4.cfg yolov4.weights test.mp4`
 
-    * after launching your console application and entering the image file name - you will see info for each object: 
+    - after launching your console application and entering the image file name - you will see info for each object:
     `<obj_id> <left_x> <top_y> <width> <height> <probability>`
-    * to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5)
-    * you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75)
+    - to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5)
+    - you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75)
 
 `yolo_cpp_dll.dll`-API: [link](https://github.com/AlexeyAB/darknet/blob/master/src/yolo_v2_class.hpp#L42)
 
diff --git a/build.ps1 b/build.ps1
index 61e70425783..86c9acf4ba3 100755
--- a/build.ps1
+++ b/build.ps1
@@ -18,11 +18,17 @@ param (
   [switch]$ForceVCPKGCacheRemoval = $false,
   [switch]$ForceSetupVS = $false,
   [Int32]$ForceGCCVersion = 0,
+  [Int32]$ForceOpenCVVersion = 0,
   [Int32]$NumberOfBuildWorkers = 8,
   [string]$AdditionalBuildSetup = ""  # "-DCMAKE_CUDA_ARCHITECTURES=30"
 )
 
-$build_ps1_version = "0.9.3"
+$build_ps1_version = "0.9.4"
+
+$ErrorActionPreference = "SilentlyContinue"
+Stop-Transcript | out-null
+$ErrorActionPreference = "Continue"
+Start-Transcript -Path $PSScriptRoot/build.log
 
 Function MyThrow ($Message) {
   if ($DisableInteractive) {
@@ -389,6 +395,7 @@ function getLatestVisualStudioWithDesktopWorkloadVersion() {
   return $installationVersion
 }
 
+$vcpkg_root_set_by_this_script = $false
 
 if ((Test-Path env:VCPKG_ROOT) -and $UseVCPKG) {
   $vcpkg_path = "$env:VCPKG_ROOT"
@@ -398,6 +405,7 @@ if ((Test-Path env:VCPKG_ROOT) -and $UseVCPKG) {
 elseif ((Test-Path "${env:WORKSPACE}/vcpkg") -and $UseVCPKG) {
   $vcpkg_path = "${env:WORKSPACE}/vcpkg"
   $env:VCPKG_ROOT = "${env:WORKSPACE}/vcpkg"
+  $vcpkg_root_set_by_this_script = $true
   Write-Host "Found vcpkg in WORKSPACE/vcpkg: $vcpkg_path"
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
@@ -405,6 +413,7 @@ elseif (-not($null -eq ${RUNVCPKG_VCPKG_ROOT_OUT})) {
   if((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
     $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}"
     $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}"
+    $vcpkg_root_set_by_this_script = $true
     Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${vcpkg_path}"
     $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
   }
@@ -421,6 +430,7 @@ elseif ($UseVCPKG) {
   }
   $vcpkg_path = "$PWD/vcpkg"
   $env:VCPKG_ROOT = "$PWD/vcpkg"
+  $vcpkg_root_set_by_this_script = $true
   Write-Host "Found vcpkg in $PWD/vcpkg: $PWD/vcpkg"
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
@@ -464,6 +474,16 @@ if ($ForceVCPKGCacheRemoval -and (-Not $UseVCPKG)) {
   Write-Host "VCPKG is not enabled, so local vcpkg binary cache will not be deleted even if requested" -ForegroundColor Yellow
 }
 
+if (($ForceOpenCVVersion -eq 2) -and $UseVCPKG) {
+  Write-Host "You requested OpenCV version 2, so vcpkg will install that version" -ForegroundColor Yellow
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV2=ON"
+}
+
+if (($ForceOpenCVVersion -eq 3) -and $UseVCPKG) {
+  Write-Host "You requested OpenCV version 3, so vcpkg will install that version" -ForegroundColor Yellow
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV3=ON"
+}
+
 if ($UseVCPKG -and $ForceVCPKGCacheRemoval) {
   if ($IsWindows -or $IsWindowsPowerShell) {
     $vcpkgbinarycachepath = "$env:LOCALAPPDATA/vcpkg/archive"
@@ -608,3 +628,11 @@ Set-Location ..
 Copy-Item cmake/Modules/*.cmake share/darknet/
 Write-Host "Build complete!" -ForegroundColor Green
 Pop-Location
+
+if ($vcpkg_root_set_by_this_script) {
+  $env:VCPKG_ROOT = $null
+}
+
+$ErrorActionPreference = "SilentlyContinue"
+Stop-Transcript | out-null
+$ErrorActionPreference = "Continue"
diff --git a/scripts/deploy-cuda.sh b/scripts/deploy-cuda.sh
new file mode 100755
index 00000000000..65f173aabaf
--- /dev/null
+++ b/scripts/deploy-cuda.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+if [[ "$OSTYPE" == "darwin"* ]]; then
+  echo "Unable to deploy CUDA on macOS, please wait for a future script update"
+else
+  if [[ $(cut -f2 <<< $(lsb_release -r)) == "18.04" ]]; then
+    sudo apt-get update
+    sudo apt-get install build-essential g++
+    sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget
+    wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
+    sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
+    sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
+    wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
+    sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
+    sudo apt-get update
+    sudo apt-get dist-upgrade -y
+    sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2
+    sudo apt-get install -y --no-install-recommends libcudnn7-dev
+    sudo rm -rf /usr/local/cuda
+    sudo ln -s /usr/local/cuda-10.2 /usr/local/cuda
+  elif [[ $(cut -f2 <<< $(lsb_release -r)) == "20.04" ]]; then
+    sudo apt-get update
+    sudo apt-get install build-essential g++
+    sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget
+    sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+    sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+    sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+    sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
+    sudo apt-get update
+    sudo apt-get dist-upgrade -y
+    sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
+    sudo apt-get install -y --no-install-recommends libcudnn8-dev
+    sudo rm -rf /usr/local/cuda
+    sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
+  else
+    echo "Unable to deploy CUDA on this Linux version, please wait for a future script update"
+  fi
+fi
diff --git a/scripts/dice_label.sh b/scripts/dice_label.sh
old mode 100644
new mode 100755
diff --git a/scripts/get_coco2017.sh b/scripts/get_coco2017.sh
old mode 100644
new mode 100755
diff --git a/scripts/get_coco_dataset.sh b/scripts/get_coco_dataset.sh
old mode 100644
new mode 100755
diff --git a/scripts/get_imagenet_train.sh b/scripts/get_imagenet_train.sh
old mode 100644
new mode 100755
diff --git a/scripts/imagenet_label.sh b/scripts/imagenet_label.sh
old mode 100644
new mode 100755
diff --git a/scripts/install_OpenCV4.sh b/scripts/install_OpenCV4.sh
old mode 100644
new mode 100755
diff --git a/scripts/setup.ps1 b/scripts/setup.ps1
index c5c2ae22b21..ca54dba9754 100755
--- a/scripts/setup.ps1
+++ b/scripts/setup.ps1
@@ -1,10 +1,11 @@
 #!/usr/bin/env pwsh
 
-$install_cuda = $false
+param (
+  [switch]$InstallCUDA = $false
+)
 
 if ($null -eq (Get-Command "choco.exe" -ErrorAction SilentlyContinue)) {
   # Download and install Chocolatey
-  Set-ExecutionPolicy unrestricted -Scope CurrentUser
   Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))
   Throw "Please close and re-open powershell and then re-run setup.ps1 script"
 }
@@ -13,23 +14,26 @@ Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y cmake nin
 Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y visualstudio2019buildtools --package-parameters `"--add Microsoft.VisualStudio.Component.VC.CoreBuildTools --includeRecommended --includeOptional --passive --locale en-US --lang en-US`""
 Push-Location $PSScriptRoot
 
-if ($install_cuda) {
-  & ./deploy-cuda.ps1
-  $features = "full"
+if ($InstallCUDA) {
+  & $PSScriptRoot/deploy-cuda.ps1
+  $env:CUDA_PATH="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+  $env:CUDA_TOOLKIT_ROOT_DIR="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+  $env:CUDACXX="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3\\bin\\nvcc.exe"
+  $CUDAisAvailable = $true
 }
 else {
   if (-not $null -eq $env:CUDA_PATH) {
-    $features = "full"
+    $CUDAisAvailable = $true
   }
   else{
-    $features = "opencv-base"
+    $CUDAisAvailable = $false
   }
 }
 
-git.exe clone https://github.com/microsoft/vcpkg ../vcpkg
-Set-Location ..\vcpkg
-.\bootstrap-vcpkg.bat -disableMetrics
-.\vcpkg.exe install darknet[${features}]:x64-windows
-Pop-Location
-
-Write-Host "Darknet installed in $pwd\x64-windows\tools\darknet" -ForegroundColor Yellow
+if ($CUDAisAvailable) {
+  & $PSScriptRoot/../build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
+  #& $PSScriptRoot/../build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableOPENCV_CUDA  -DisableInteractive -DoNotUpdateDARKNET
+}
+else {
+  & $PSScriptRoot/../build.ps1 -UseVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
+}
diff --git a/scripts/setup.sh b/scripts/setup.sh
index c33379e92ef..51d641bcea4 100755
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@@ -1,85 +1,102 @@
 #!/usr/bin/env bash
 
-## enable or disable installed components
+install_tools=false
+bypass_driver_installation=false
 
-install_cuda=true
+POSITIONAL=()
+while [[ $# -gt 0 ]]
+do
+key="$1"
 
-###########################
+case $key in
+    -InstallCUDA|--InstallCUDA)
+    install_tools=true
+    shift
+    ;;
+    -BypassDRIVER|--BypassDRIVER)
+    bypass_driver_installation=true
+    shift
+    ;;
+    *)    # unknown option
+    POSITIONAL+=("$1") # save it in an array for later
+    shift # past argument
+    ;;
+esac
+done
+set -- "${POSITIONAL[@]}" # restore positional parameters
 
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+echo "This script is located in $script_dir"
+cd $script_dir/..
 temp_folder="./temp"
 mkdir -p $temp_folder
 cd $temp_folder
 
-sudo apt-get install cmake git ninja-build build-essential g++
-
-if [ "$install_cuda" = true ] ; then
+if [ "$install_tools" = true ] ; then
+  $script_dir/deploy-cuda.sh
   if [[ "$OSTYPE" == "darwin"* ]]; then
-    echo "Unable to provide CUDA on macOS"
+    echo "Unable to provide tools on macOS, please wait for a future script update or do not put -InstallCUDA command line flag to continue"
   else
-    # Download and install CUDA
     if [[ $(cut -f2 <<< $(lsb_release -r)) == "18.04" ]]; then
-      wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
-      sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
-      sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
-      wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
-      sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
-      sudo apt update
+      sudo apt-get update
+      sudo apt-get install git ninja-build build-essential g++ nasm yasm
+      sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget
+      wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
+      sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
+      wget -q https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb
+      sudo dpkg -i packages-microsoft-prod.deb
+      sudo add-apt-repository universe
+      sudo apt-get update
       sudo apt-get dist-upgrade -y
-      sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2
-      sudo apt-get install -y --no-install-recommends libcudnn7-dev
-      sudo rm -rf /usr/local/cuda
-      sudo ln -s /usr/local/cuda-10.2 /usr/local/cuda
+      sudo apt-get install -y cmake
+      sudo apt-get install -y powershell
+      if [ "$bypass_driver_installation" = true ] ; then
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+      fi
       export PATH=/usr/local/cuda/bin:$PATH
       export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
       export CUDACXX=/usr/local/cuda/bin/nvcc
       export CUDA_PATH=/usr/local/cuda
       export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda
-      features="full"
+      cuda_is_available=true
     elif [[ $(cut -f2 <<< $(lsb_release -r)) == "20.04" ]]; then
-      sudo apt update
+      sudo apt-get update
+      sudo apt-get install git ninja-build build-essential g++ nasm yasm
+      sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget
+      wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
+      sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
+      wget -q https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb
+      sudo dpkg -i packages-microsoft-prod.deb
+      sudo add-apt-repository universe
+      sudo apt-get update
       sudo apt-get dist-upgrade -y
-      #sudo apt-get install -y --no-install-recommends nvidia-cuda-dev nvidia-cuda-toolkit
-      sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-      sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
-      sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
-      sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /"
-      sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2
-      sudo apt-get install -y --no-install-recommends libcudnn8-dev
-      sudo rm -rf /usr/local/cuda
-      sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda
+      sudo apt-get install -y cmake
+      sudo apt-get install -y powershell
+      if [ "$bypass_driver_installation" = true ] ; then
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1
+        sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so
+      fi
       export PATH=/usr/local/cuda/bin:$PATH
       export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
       export CUDACXX=/usr/local/cuda/bin/nvcc
       export CUDA_PATH=/usr/local/cuda
       export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda
-      features="full"
+      cuda_is_available=true
     else
-      echo "Unable to auto-install CUDA on this Linux OS"
-      features="opencv-base"
+      echo "Unable to provide tools on macOS, please wait for a future script update or do not put -InstallCUDA command line flag to continue"
     fi
   fi
-else
-  if [[ -v CUDA_PATH ]]; then
-    features="full"
-  else
-    features="opencv-base"
-  fi
 fi
 
 cd ..
-rm -rf $temp_folder
-
-if [[ ! -v VCPKG_ROOT ]]; then
-  git clone https://github.com/microsoft/vcpkg
-  cd vcpkg
-  ./bootstrap-vcpkg.sh -disableMetrics
-  export VCPKG_ROOT=$(pwd)
-fi
-
-$VCPKG_ROOT/vcpkg install darknet[${features}]
+rm -rf "$temp_folder"
 
-if [[ "$OSTYPE" == "darwin"* ]]; then
-  echo "Darknet installed in $VCPKG_ROOT/installed/x64-osx/tools/darknet"
+if [[ -v CUDA_PATH ]]; then
+  ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
+  #./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -EnableOPENCV_CUDA -DisableInteractive -DoNotUpdateDARKNET
 else
-  echo "Darknet installed in $VCPKG_ROOT/installed/x64-linux/tools/darknet"
+  ./build.ps1 -UseVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 fi

From 83e377989d1a7b86249425dc1e025ad4acff7cb7 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Mon, 7 Jun 2021 19:52:22 +0300
Subject: [PATCH 21/46] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index cc85070b24a..337d095d44f 100644
--- a/README.md
+++ b/README.md
@@ -220,6 +220,7 @@ You can get cfg-files by path: `darknet/cfg/`
   - [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
   - [YOLOv5](https://github.com/ultralytics/yolov5)
 - **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
+- **nVidia Transfer Learning Toolkit (TLT>=3.0)** Training and Detection https://docs.nvidia.com/metropolis/TLT/tlt-user-guide/text/object_detection/yolo_v4.html
 - **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results
 - **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
 - **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt

From 7ff80b1a4762e9bf6c477e8197b7d8b330488763 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 11 Jun 2021 02:35:26 +0200
Subject: [PATCH 22/46] add missing asm tools in CI, run it every night (#7790)

* add missing asm tools

* run CI every night

* add csharp tool
---
 .github/workflows/ccpp.yml  | 33 +++++++++++++-
 .github/workflows/on_pr.yml | 27 +++++++++++
 CMakeLists.txt              |  5 +++
 build.ps1                   | 18 +++++++-
 src/csharp/CMakeLists.txt   | 19 ++++++++
 src/csharp/YoloWrapper.cs   | 89 +++++++++++++++++++++++++++++++++++++
 6 files changed, 188 insertions(+), 3 deletions(-)
 create mode 100644 src/csharp/CMakeLists.txt
 create mode 100644 src/csharp/YoloWrapper.cs

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index b26e0701ac0..21ff07296fb 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -1,6 +1,10 @@
 name: Darknet Continuous Integration
 
-on: [push, workflow_dispatch]
+on:
+  push:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'
 
 env:
   VCPKG_BINARY_SOURCES: 'clear;nuget,vcpkgbinarycache,readwrite'
@@ -68,6 +72,11 @@ jobs:
 
     - uses: lukka/get-cmake@latest
 
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
     - name: 'Install CUDA'
       run: ./scripts/deploy-cuda.sh
 
@@ -123,6 +132,11 @@ jobs:
 
     - uses: lukka/get-cmake@latest
 
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
     - name: 'Install CUDA'
       run: ./scripts/deploy-cuda.sh
 
@@ -161,6 +175,11 @@ jobs:
 
     - uses: lukka/get-cmake@latest
 
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
     - name: 'Install CUDA'
       run: ./scripts/deploy-cuda.sh
 
@@ -513,6 +532,18 @@ jobs:
       run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
+  win-csharp:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -EnableCSharpWrapper -DisableInteractive -DoNotUpdateDARKNET
+
+
   win-intlibs-cuda:
     runs-on: windows-latest
     steps:
diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml
index 42c03d696be..6b6aface5b8 100644
--- a/.github/workflows/on_pr.yml
+++ b/.github/workflows/on_pr.yml
@@ -68,6 +68,11 @@ jobs:
 
     - uses: lukka/get-cmake@latest
 
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
     - name: 'Install CUDA'
       run: ./scripts/deploy-cuda.sh
 
@@ -120,6 +125,11 @@ jobs:
 
     - uses: lukka/get-cmake@latest
 
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
     - name: 'Install CUDA'
       run: ./scripts/deploy-cuda.sh
 
@@ -155,6 +165,11 @@ jobs:
 
     - uses: lukka/get-cmake@latest
 
+    - name: Update apt
+      run: sudo apt update
+    - name: Install dependencies
+      run: sudo apt install yasm nasm
+
     - name: 'Install CUDA'
       run: ./scripts/deploy-cuda.sh
 
@@ -492,6 +507,18 @@ jobs:
       run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
+  win-csharp:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: lukka/get-cmake@latest
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -EnableCSharpWrapper -DisableInteractive -DoNotUpdateDARKNET
+
+
   win-intlibs-cuda:
     runs-on: windows-latest
     steps:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3630f516dc..0e1abf32d9c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,7 @@ option(ENABLE_CUDNN "Enable CUDNN" ON)
 option(ENABLE_CUDNN_HALF "Enable CUDNN Half precision" ON)
 option(ENABLE_ZED_CAMERA "Enable ZED Camera support" ON)
 option(ENABLE_VCPKG_INTEGRATION "Enable VCPKG integration" ON)
+option(ENABLE_CSHARP_WRAPPER "Enable building a csharp wrapper" OFF)
 option(VCPKG_BUILD_OPENCV_WITH_CUDA "Build OpenCV with CUDA extension integration" ON)
 option(VCPKG_USE_OPENCV2 "Use legacy OpenCV 2" OFF)
 option(VCPKG_USE_OPENCV3 "Use legacy OpenCV 3" OFF)
@@ -576,3 +577,7 @@ install(FILES
   "${PROJECT_BINARY_DIR}/DarknetConfigVersion.cmake"
   DESTINATION "${INSTALL_CMAKE_DIR}"
 )
+
+if(ENABLE_CSHARP_WRAPPER)
+  add_subdirectory(src/csharp)
+endif()
diff --git a/build.ps1 b/build.ps1
index 86c9acf4ba3..d230622e9a5 100755
--- a/build.ps1
+++ b/build.ps1
@@ -17,13 +17,14 @@ param (
   [switch]$ForceStaticLib = $false,
   [switch]$ForceVCPKGCacheRemoval = $false,
   [switch]$ForceSetupVS = $false,
+  [switch]$EnableCSharpWrapper = $false,
   [Int32]$ForceGCCVersion = 0,
   [Int32]$ForceOpenCVVersion = 0,
   [Int32]$NumberOfBuildWorkers = 8,
   [string]$AdditionalBuildSetup = ""  # "-DCMAKE_CUDA_ARCHITECTURES=30"
 )
 
-$build_ps1_version = "0.9.4"
+$build_ps1_version = "0.9.5"
 
 $ErrorActionPreference = "SilentlyContinue"
 Stop-Transcript | out-null
@@ -242,6 +243,15 @@ else {
   Write-Host "VisualStudio integration is enabled, please pass -DoNotSetupVS to the script to disable"
 }
 
+if ($EnableCSharpWrapper -and ($IsWindowsPowerShell -or $IsWindows)) {
+  Write-Host "Yolo C# wrapper integration is enabled. Will be built with Visual Studio generator. Disabling Ninja"
+  $DoNotUseNinja = $true
+}
+else {
+  $EnableCSharpWrapper = $false
+  Write-Host "Yolo C# wrapper integration is disabled, please pass -EnableCSharpWrapper to the script to enable. You must be on Windows!"
+}
+
 if ($DoNotUseNinja) {
   Write-Host "Ninja is disabled"
 }
@@ -410,7 +420,7 @@ elseif ((Test-Path "${env:WORKSPACE}/vcpkg") -and $UseVCPKG) {
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
 elseif (-not($null -eq ${RUNVCPKG_VCPKG_ROOT_OUT})) {
-  if((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
+  if ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
     $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}"
     $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}"
     $vcpkg_root_set_by_this_script = $true
@@ -592,6 +602,10 @@ if (-Not $EnableOPENCV_CUDA) {
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF"
 }
 
+if ($EnableCSharpWrapper) {
+  $additional_build_setup = $additional_build_setup + " -DENABLE_CSHARP_WRAPPER:BOOL=ON"
+}
+
 $build_folder = "./build_release"
 if (-Not $DoNotDeleteBuildFolder) {
   Write-Host "Removing folder $build_folder" -ForegroundColor Yellow
diff --git a/src/csharp/CMakeLists.txt b/src/csharp/CMakeLists.txt
new file mode 100644
index 00000000000..971725b227d
--- /dev/null
+++ b/src/csharp/CMakeLists.txt
@@ -0,0 +1,19 @@
+
+project(YoloWrapper LANGUAGES CSharp)
+include(CSharpUtilities)
+
+add_library(${PROJECT_NAME}
+  ${PROJECT_NAME}.cs
+)
+
+target_link_libraries(${PROJECT_NAME} PRIVATE dark)
+
+set_property(TARGET ${PROJECT_NAME} PROPERTY VS_DOTNET_REFERENCES
+  "System"
+  "System.Runtime.InteropServices"
+)
+
+install(TARGETS ${PROJECT_NAME}
+  RUNTIME DESTINATION "${INSTALL_BIN_DIR}"
+  COMPONENT dev
+)
diff --git a/src/csharp/YoloWrapper.cs b/src/csharp/YoloWrapper.cs
new file mode 100644
index 00000000000..52c12adb80f
--- /dev/null
+++ b/src/csharp/YoloWrapper.cs
@@ -0,0 +1,89 @@
+﻿using System;
+using System.Runtime.InteropServices;
+
+namespace Darknet
+{
+    public class YoloWrapper : IDisposable
+    {
+        private const string YoloLibraryName = "yolo_cpp_dll.dll";
+        private const int MaxObjects = 1000;
+
+        [DllImport(YoloLibraryName, EntryPoint = "init")]
+        private static extern int InitializeYolo(string configurationFilename, string weightsFilename, int gpu);
+
+        [DllImport(YoloLibraryName, EntryPoint = "detect_image")]
+        private static extern int DetectImage(string filename, ref BboxContainer container);
+
+        [DllImport(YoloLibraryName, EntryPoint = "detect_mat")]
+        private static extern int DetectImage(IntPtr pArray, int nSize, ref BboxContainer container);
+
+        [DllImport(YoloLibraryName, EntryPoint = "dispose")]
+        private static extern int DisposeYolo();
+
+        [StructLayout(LayoutKind.Sequential)]
+        public struct bbox_t
+        {
+            public UInt32 x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
+            public float prob;           // confidence - probability that the object was found correctly
+            public UInt32 obj_id;        // class of object - from range [0, classes-1]
+            public UInt32 track_id;      // tracking id for video (0 - untracked, 1 - inf - tracked object)
+            public UInt32 frames_counter;
+            public float x_3d, y_3d, z_3d;  // 3-D coordinates, if there is used 3D-stereo camera
+        };
+
+        [StructLayout(LayoutKind.Sequential)]
+        public struct BboxContainer
+        {
+            [MarshalAs(UnmanagedType.ByValArray, SizeConst = MaxObjects)]
+            public bbox_t[] candidates;
+        }
+
+        public YoloWrapper(string configurationFilename, string weightsFilename, int gpu)
+        {
+            InitializeYolo(configurationFilename, weightsFilename, gpu);
+        }
+
+        public void Dispose()
+        {
+            DisposeYolo();
+        }
+
+        public bbox_t[] Detect(string filename)
+        {
+            var container = new BboxContainer();
+            var count = DetectImage(filename, ref container);
+
+            return container.candidates;
+        }
+
+        public bbox_t[] Detect(byte[] imageData)
+        {
+            var container = new BboxContainer();
+
+            var size = Marshal.SizeOf(imageData[0]) * imageData.Length;
+            var pnt = Marshal.AllocHGlobal(size);
+
+            try
+            {
+                // Copy the array to unmanaged memory.
+                Marshal.Copy(imageData, 0, pnt, imageData.Length);
+                var count = DetectImage(pnt, imageData.Length, ref container);
+                if (count == -1)
+                {
+                    throw new NotSupportedException($"{YoloLibraryName} has no OpenCV support");
+                }
+            }
+            catch (Exception exception)
+            {
+                return null;
+            }
+            finally
+            {
+                // Free the unmanaged memory.
+                Marshal.FreeHGlobal(pnt);
+            }
+
+            return container.candidates;
+        }
+    }
+}

From 894aada70bcc78a224b7675a19068b6353dd6fbe Mon Sep 17 00:00:00 2001
From: Ievgen Popovych <jmennius@gmail.com>
Date: Fri, 11 Jun 2021 03:36:12 +0300
Subject: [PATCH 23/46] region_layer: Populate out_h/w/c fileds for consitency
 (#7783)

---
 src/region_layer.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/region_layer.c b/src/region_layer.c
index 7aa1a196f80..b7aba32e1a9 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -20,6 +20,10 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
     l.batch = batch;
     l.h = h;
     l.w = w;
+    l.c = n*(classes + coords + 1);
+    l.out_w = l.w;
+    l.out_h = l.h;
+    l.out_c = l.c;
     l.classes = classes;
     l.coords = coords;
     l.cost = (float*)xcalloc(1, sizeof(float));

From 67543e27aa151dea522ff2a8b287c709c0397749 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Fri, 11 Jun 2021 03:38:20 +0300
Subject: [PATCH 24/46] fix darknet_no_gpu.vcxproj

---
 build/darknet/darknet_no_gpu.vcxproj | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build/darknet/darknet_no_gpu.vcxproj b/build/darknet/darknet_no_gpu.vcxproj
index 72d23af3d09..fadf7289694 100644
--- a/build/darknet/darknet_no_gpu.vcxproj
+++ b/build/darknet/darknet_no_gpu.vcxproj
@@ -227,6 +227,7 @@
     <ClCompile Include="..\..\src\region_layer.c" />
     <ClCompile Include="..\..\src\reorg_layer.c" />
     <ClCompile Include="..\..\src\reorg_old_layer.c" />
+    <ClCompile Include="..\..\src\representation_layer.c" />
     <ClCompile Include="..\..\src\rnn.c" />
     <ClCompile Include="..\..\src\rnn_layer.c" />
     <ClCompile Include="..\..\src\rnn_vid.c" />
@@ -290,6 +291,7 @@
     <ClInclude Include="..\..\src\region_layer.h" />
     <ClInclude Include="..\..\src\reorg_layer.h" />
     <ClInclude Include="..\..\src\reorg_old_layer.h" />
+    <ClInclude Include="..\..\src\representation_layer.h" />
     <ClInclude Include="..\..\src\rnn_layer.h" />
     <ClInclude Include="..\..\src\route_layer.h" />
     <ClInclude Include="..\..\src\sam_layer.h" />

From 00b8e29b6802cc085e30f64b48f4a693e6019235 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Sat, 12 Jun 2021 02:53:08 +0300
Subject: [PATCH 25/46] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 337d095d44f..2d1bbc52e6c 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ About Darknet framework: http://pjreddie.com/darknet/
 [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
 [![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965)
 [![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2004.10934-B31B1B.svg)](https://arxiv.org/abs/2004.10934)
+[![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2011.08036-B31B1B.svg)](https://arxiv.org/abs/2011.08036)
 [![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE)
 [![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg)
 

From 396b5299a79f6401c0657128ec6ff6d78816d78f Mon Sep 17 00:00:00 2001
From: Double <ethan9141@gmail.com>
Date: Thu, 17 Jun 2021 00:25:00 +0800
Subject: [PATCH 26/46] fix: flush buffer to weights-file when writing each
 layer (#7747)

Co-authored-by: Double.c <double.c@inwinstack.com>
---
 src/parser.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/parser.c b/src/parser.c
index 8f8f584268e..a2d133382c4 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2026,6 +2026,7 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema)
             fwrite(l.biases, sizeof(float), l.outputs, fp);
             fwrite(l.weights, sizeof(float), size, fp);
         }
+        fflush(fp);
     }
     fclose(fp);
 }

From 6c171a4ceaaf508568a90fe8f2d01014c2c62f34 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 18 Jun 2021 00:15:55 +0200
Subject: [PATCH 27/46] add automatic rebase github action (#7809)

* add automatic rebase action

* do not upload artifacts from PRs

* use proper symbol in build.ps1

* fix lib naming

* fix folder for dll artifacts
---
 .github/workflows/ccpp.yml                    |   2 +-
 .github/workflows/on_pr.yml                   | 127 ------------------
 .github/workflows/rebase.yml                  |  19 +++
 build.ps1                                     |   2 +-
 src/csharp/CMakeLists.txt                     |   2 +-
 .../{YoloWrapper.cs => YoloCSharpWrapper.cs}  |   2 +-
 6 files changed, 23 insertions(+), 131 deletions(-)
 create mode 100644 .github/workflows/rebase.yml
 rename src/csharp/{YoloWrapper.cs => YoloCSharpWrapper.cs} (97%)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 21ff07296fb..92fb50375b3 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -458,7 +458,7 @@ jobs:
     - uses: actions/upload-artifact@v2
       with:
         name: darknet-vcpkg-${{ runner.os }}
-        path: ${{ runner.workspace }}/buildDirectory/Release/*.dll
+        path: ${{ github.workspace }}/build_release/*.dll
     - uses: actions/upload-artifact@v2
       with:
         name: darknet-vcpkg-${{ runner.os }}
diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml
index 6b6aface5b8..9f0a664ebcb 100644
--- a/.github/workflows/on_pr.yml
+++ b/.github/workflows/on_pr.yml
@@ -100,23 +100,6 @@ jobs:
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
       run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-cuda-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-cuda-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-cuda-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-cuda-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   ubuntu-vcpkg-opencv3-cuda:
     runs-on: ubuntu-20.04
@@ -219,23 +202,6 @@ jobs:
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
       run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   ubuntu-cuda:
     runs-on: ubuntu-20.04
@@ -267,23 +233,6 @@ jobs:
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
       run: ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-cuda-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-cuda-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-cuda-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-cuda-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   ubuntu-no-ocv-cpp:
     runs-on: ubuntu-20.04
@@ -339,23 +288,6 @@ jobs:
       shell: pwsh
       run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   osx:
     runs-on: macos-latest
@@ -371,23 +303,6 @@ jobs:
       shell: pwsh
       run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   osx-no-ocv-no-omp-cpp:
     runs-on: macos-latest
@@ -421,27 +336,6 @@ jobs:
       shell: pwsh
       run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: ${{ runner.workspace }}/buildDirectory/Release/*.dll
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-vcpkg-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   win-intlibs:
     runs-on: windows-latest
@@ -454,27 +348,6 @@ jobs:
       shell: pwsh
       run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET
 
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: cfg
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: data
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/*dark*
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/3rdparty/pthreads/bin/*.dll
-    - uses: actions/upload-artifact@v2
-      with:
-        name: darknet-${{ runner.os }}
-        path: ${{ github.workspace }}/uselib*
-
 
   win-setup-ps1:
     runs-on: windows-latest
diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml
new file mode 100644
index 00000000000..608904374ef
--- /dev/null
+++ b/.github/workflows/rebase.yml
@@ -0,0 +1,19 @@
+name: Automatic Rebase
+on:
+  issue_comment:
+    types: [created]
+jobs:
+  rebase:
+    name: Rebase
+    if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') && github.event.comment.author_association == 'MEMBER'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout the latest code
+        uses: actions/checkout@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0 # otherwise, you will fail to push refs to dest repo
+      - name: Automatic Rebase
+        uses: cirrus-actions/rebase@1.4
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/build.ps1 b/build.ps1
index d230622e9a5..b5115cabc15 100755
--- a/build.ps1
+++ b/build.ps1
@@ -603,7 +603,7 @@ if (-Not $EnableOPENCV_CUDA) {
 }
 
 if ($EnableCSharpWrapper) {
-  $additional_build_setup = $additional_build_setup + " -DENABLE_CSHARP_WRAPPER:BOOL=ON"
+  $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CSHARP_WRAPPER:BOOL=ON"
 }
 
 $build_folder = "./build_release"
diff --git a/src/csharp/CMakeLists.txt b/src/csharp/CMakeLists.txt
index 971725b227d..1b591a16dab 100644
--- a/src/csharp/CMakeLists.txt
+++ b/src/csharp/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-project(YoloWrapper LANGUAGES CSharp)
+project(YoloCSharpWrapper LANGUAGES CSharp)
 include(CSharpUtilities)
 
 add_library(${PROJECT_NAME}
diff --git a/src/csharp/YoloWrapper.cs b/src/csharp/YoloCSharpWrapper.cs
similarity index 97%
rename from src/csharp/YoloWrapper.cs
rename to src/csharp/YoloCSharpWrapper.cs
index 52c12adb80f..35d23a9c2e1 100644
--- a/src/csharp/YoloWrapper.cs
+++ b/src/csharp/YoloCSharpWrapper.cs
@@ -5,7 +5,7 @@ namespace Darknet
 {
     public class YoloWrapper : IDisposable
     {
-        private const string YoloLibraryName = "yolo_cpp_dll.dll";
+        private const string YoloLibraryName = "darknet.dll";
         private const int MaxObjects = 1000;
 
         [DllImport(YoloLibraryName, EntryPoint = "init")]

From 3fafd211b908fd84a66f3723c77119351ed656a5 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Mon, 21 Jun 2021 11:07:31 +0200
Subject: [PATCH 28/46] [autorebase] bump to v1.5 (#7819)

---
 .github/workflows/rebase.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml
index 608904374ef..251a259ffcf 100644
--- a/.github/workflows/rebase.yml
+++ b/.github/workflows/rebase.yml
@@ -5,7 +5,7 @@ on:
 jobs:
   rebase:
     name: Rebase
-    if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') && github.event.comment.author_association == 'MEMBER'
+    if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'CONTRIBUTOR')
     runs-on: ubuntu-latest
     steps:
       - name: Checkout the latest code
@@ -14,6 +14,6 @@ jobs:
           token: ${{ secrets.GITHUB_TOKEN }}
           fetch-depth: 0 # otherwise, you will fail to push refs to dest repo
       - name: Automatic Rebase
-        uses: cirrus-actions/rebase@1.4
+        uses: cirrus-actions/rebase@1.5
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 070ed01e26e5568ad5183321881d3b26380f5345 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Mon, 21 Jun 2021 23:47:53 +0300
Subject: [PATCH 29/46] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2d1bbc52e6c..42007e975d6 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ About Darknet framework: http://pjreddie.com/darknet/
 [![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg)
 
 - [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
-- [Requirements (and how to install dependencies)](#requirements)
+- [Requirements (and how to install dependencies)](#requirements-for-windows-linux-and-macos)
 - [Pre-trained models](#pre-trained-models)
 - [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
 - [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)

From 46dea82bced8e7e2aa571a57cd819e97f19356da Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Tue, 22 Jun 2021 00:31:38 +0300
Subject: [PATCH 30/46] Added yolov4-p5.cfg and yolov4-p6.cfg

---
 build/darknet/x64/cfg/yolov4-p5.cfg | 1837 +++++++++++++++++++++
 build/darknet/x64/cfg/yolov4-p6.cfg | 2293 +++++++++++++++++++++++++++
 cfg/yolov4-p5.cfg                   | 1837 +++++++++++++++++++++
 cfg/yolov4-p6.cfg                   | 2293 +++++++++++++++++++++++++++
 4 files changed, 8260 insertions(+)
 create mode 100644 build/darknet/x64/cfg/yolov4-p5.cfg
 create mode 100644 build/darknet/x64/cfg/yolov4-p6.cfg
 create mode 100644 cfg/yolov4-p5.cfg
 create mode 100644 cfg/yolov4-p6.cfg

diff --git a/build/darknet/x64/cfg/yolov4-p5.cfg b/build/darknet/x64/cfg/yolov4-p5.cfg
new file mode 100644
index 00000000000..3a7848177fd
--- /dev/null
+++ b/build/darknet/x64/cfg/yolov4-p5.cfg
@@ -0,0 +1,1837 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=896
+height=896
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+ema_alpha=0.9998
+
+#use_cuda_graph = 1
+
+
+# ============ Backbone ============ #
+
+# Stem 
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+
+# P1
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-7
+
+# Transition last
+
+# 10 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P2
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-13
+
+# Transition last
+
+# 26 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P3
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 78 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P4
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 130 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P5
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 158 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Backbone ============ #
+
+# ============ Neck ============ #
+
+# CSPSPP
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+# 173 (previous+6+5+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# End of CSPSPP
+
+
+# FPN-4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 130
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 189 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 78
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 205 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 189
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 218 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-5
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 173
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 231 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+# YOLO-3
+
+[route]
+layers = 205
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 0,1,2,3
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-4
+
+[route]
+layers = 218
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 4,5,6,7
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-5
+
+[route]
+layers = 231
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 8,9,10,11
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+# ============ End of Head ============ #
\ No newline at end of file
diff --git a/build/darknet/x64/cfg/yolov4-p6.cfg b/build/darknet/x64/cfg/yolov4-p6.cfg
new file mode 100644
index 00000000000..6e241e12e01
--- /dev/null
+++ b/build/darknet/x64/cfg/yolov4-p6.cfg
@@ -0,0 +1,2293 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=1280
+height=1280
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+ema_alpha=0.9998
+
+#use_cuda_graph = 1
+
+
+# ============ Backbone ============ #
+
+# Stem 
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+
+# P1
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-7
+
+# Transition last
+
+# 10 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P2
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-13
+
+# Transition last
+
+# 26 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P3
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 78 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P4
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 130 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P5
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 158 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P6
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 186 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Backbone ============ #
+
+# ============ Neck ============ #
+
+# CSPSPP
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+# 201 (previous+6+5+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# End of CSPSPP
+
+
+# FPN-5
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 158
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 217 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 130
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 233 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 78
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 249 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 233
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 262 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-5
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 217
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 275 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-6
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 201
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 288 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+# YOLO-3
+
+[route]
+layers = 249
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 0,1,2,3
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+scale_x_y = 1.05
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+
+
+# YOLO-4
+
+[route]
+layers = 262
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 4,5,6,7
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-5
+
+[route]
+layers = 275
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 8,9,10,11
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-6
+
+[route]
+layers = 288
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 12,13,14,15
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+# ============ End of Head ============ #
\ No newline at end of file
diff --git a/cfg/yolov4-p5.cfg b/cfg/yolov4-p5.cfg
new file mode 100644
index 00000000000..3a7848177fd
--- /dev/null
+++ b/cfg/yolov4-p5.cfg
@@ -0,0 +1,1837 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=896
+height=896
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+ema_alpha=0.9998
+
+#use_cuda_graph = 1
+
+
+# ============ Backbone ============ #
+
+# Stem 
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+
+# P1
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-7
+
+# Transition last
+
+# 10 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P2
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-13
+
+# Transition last
+
+# 26 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P3
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 78 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P4
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 130 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P5
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 158 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Backbone ============ #
+
+# ============ Neck ============ #
+
+# CSPSPP
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+# 173 (previous+6+5+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# End of CSPSPP
+
+
+# FPN-4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 130
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 189 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 78
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 205 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 189
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 218 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-5
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 173
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 231 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+# YOLO-3
+
+[route]
+layers = 205
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 0,1,2,3
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-4
+
+[route]
+layers = 218
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 4,5,6,7
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-5
+
+[route]
+layers = 231
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 8,9,10,11
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+# ============ End of Head ============ #
\ No newline at end of file
diff --git a/cfg/yolov4-p6.cfg b/cfg/yolov4-p6.cfg
new file mode 100644
index 00000000000..6e241e12e01
--- /dev/null
+++ b/cfg/yolov4-p6.cfg
@@ -0,0 +1,2293 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=1280
+height=1280
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+ema_alpha=0.9998
+
+#use_cuda_graph = 1
+
+
+# ============ Backbone ============ #
+
+# Stem 
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+
+# P1
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-7
+
+# Transition last
+
+# 10 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P2
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-13
+
+# Transition last
+
+# 26 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P3
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 78 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P4
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 130 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P5
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 158 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P6
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 186 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Backbone ============ #
+
+# ============ Neck ============ #
+
+# CSPSPP
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+# 201 (previous+6+5+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# End of CSPSPP
+
+
+# FPN-5
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 158
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 217 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 130
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 233 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 78
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 249 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 233
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 262 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-5
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 217
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 275 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-6
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 201
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 288 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+# YOLO-3
+
+[route]
+layers = 249
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 0,1,2,3
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+scale_x_y = 1.05
+iou_thresh=0.213
+cls_normalizer=1.0
+iou_normalizer=0.07
+iou_loss=ciou
+nms_kind=greedynms
+beta_nms=0.6
+
+
+# YOLO-4
+
+[route]
+layers = 262
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 4,5,6,7
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-5
+
+[route]
+layers = 275
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 8,9,10,11
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-6
+
+[route]
+layers = 288
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 12,13,14,15
+anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
+classes=80
+num=16
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=0.4
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+# ============ End of Head ============ #
\ No newline at end of file

From 8bbdb6b77c400106cf9232d226e2cda2bf5f54b1 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Thu, 24 Jun 2021 10:23:45 +0200
Subject: [PATCH 31/46] [CI] test vcpkg nightly builds (#7826)

---
 .github/workflows/ccpp.yml |  22 +++++
 build.ps1                  | 166 +++++++++++++++++++++++++++++--------
 2 files changed, 152 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 92fb50375b3..c8d65b796b4 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -465,6 +465,28 @@ jobs:
         path: ${{ github.workspace }}/uselib*
 
 
+  win-vcpkg-port:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: 'Setup vcpkg and NuGet artifacts backend'
+      shell: bash
+      run: >
+        git clone https://github.com/microsoft/vcpkg ;
+        ./vcpkg/bootstrap-vcpkg.sh ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add
+        -Name "vcpkgbinarycache"
+        -Source http://93.49.111.10:5555/v3/index.json ;
+        $(./vcpkg/vcpkg fetch nuget | tail -n 1)
+        setapikey ${{ secrets.BAGET_API_KEY }}
+        -Source http://93.49.111.10:5555/v3/index.json
+
+    - name: 'Build'
+      shell: pwsh
+      run: ./build.ps1 -UseVCPKG -InstallDARKNETthroughVCPKG -ForceVCPKGDarknetHEAD -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
+
+
   win-intlibs:
     runs-on: windows-latest
     steps:
diff --git a/build.ps1 b/build.ps1
index b5115cabc15..92342afd8b5 100755
--- a/build.ps1
+++ b/build.ps1
@@ -8,6 +8,9 @@ param (
   [switch]$EnableOPENCV = $false,
   [switch]$EnableOPENCV_CUDA = $false,
   [switch]$UseVCPKG = $false,
+  [switch]$InstallDARKNETthroughVCPKG = $false,
+  [switch]$InstallDARKNETdependenciesThroughVCPKGManifest = $false,
+  [switch]$ForceVCPKGDarknetHEAD = $false,
   [switch]$DoNotUpdateVCPKG = $false,
   [switch]$DoNotUpdateDARKNET = $false,
   [switch]$DoNotDeleteBuildFolder = $false,
@@ -18,13 +21,14 @@ param (
   [switch]$ForceVCPKGCacheRemoval = $false,
   [switch]$ForceSetupVS = $false,
   [switch]$EnableCSharpWrapper = $false,
+  [switch]$DownloadWeights = $false,
   [Int32]$ForceGCCVersion = 0,
   [Int32]$ForceOpenCVVersion = 0,
   [Int32]$NumberOfBuildWorkers = 8,
   [string]$AdditionalBuildSetup = ""  # "-DCMAKE_CUDA_ARCHITECTURES=30"
 )
 
-$build_ps1_version = "0.9.5"
+$build_ps1_version = "0.9.6"
 
 $ErrorActionPreference = "SilentlyContinue"
 Stop-Transcript | out-null
@@ -151,10 +155,28 @@ if ($PSVersionTable.PSVersion.Major -lt 5) {
 
 if ($IsLinux -or $IsMacOS) {
   $bootstrap_ext = ".sh"
+  $exe_ext = ""
 }
 elseif ($IsWindows -or $IsWindowsPowerShell) {
   $bootstrap_ext = ".bat"
+  $exe_ext = ".exe"
 }
+
+if ($InstallDARKNETdependenciesThroughVCPKGManifest -and -not $InstallDARKNETthroughVCPKG) {
+  Write-Host "You requested darknet dependencies to be installed by vcpkg in manifest mode but you didn't enable installation through vcpkg, doing that for you"
+  $InstallDARKNETthroughVCPKG = $true
+}
+
+if ($InstallDARKNETthroughVCPKG -and -not $UseVCPKG) {
+  Write-Host "You requested darknet to be installed by vcpkg but you didn't enable vcpkg, doing that for you"
+  $UseVCPKG = $true
+}
+
+if ($InstallDARKNETthroughVCPKG -and -not $EnableOPENCV) {
+  Write-Host "You requested darknet to be installed by vcpkg but you didn't enable OpenCV, doing that for you"
+  $EnableOPENCV = $true
+}
+
 if ($UseVCPKG) {
   Write-Host "vcpkg bootstrap script: bootstrap-vcpkg${bootstrap_ext}"
 }
@@ -606,42 +628,114 @@ if ($EnableCSharpWrapper) {
   $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CSHARP_WRAPPER:BOOL=ON"
 }
 
-$build_folder = "./build_release"
-if (-Not $DoNotDeleteBuildFolder) {
-  Write-Host "Removing folder $build_folder" -ForegroundColor Yellow
-  Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder
-}
-
-New-Item -Path $build_folder -ItemType directory -Force | Out-Null
-Set-Location $build_folder
-$cmake_args = "-G `"$generator`" ${AdditionalBuildSetup} -S .."
-Write-Host "Configuring CMake project" -ForegroundColor Green
-Write-Host "CMake args: $cmake_args"
-$proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
-$handle = $proc.Handle
-$proc.WaitForExit()
-$exitCode = $proc.ExitCode
-if (-Not ($exitCode -eq 0)) {
-  MyThrow("Config failed! Exited with error code $exitCode.")
-}
-Write-Host "Building CMake project" -ForegroundColor Green
-$proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${NumberOfBuildWorkers} --target install"
-$handle = $proc.Handle
-$proc.WaitForExit()
-$exitCode = $proc.ExitCode
-if (-Not ($exitCode -eq 0)) {
-  MyThrow("Config failed! Exited with error code $exitCode.")
-}
-Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfig.cmake
-Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfigVersion.cmake
-$dllfiles = Get-ChildItem ./${dllfolder}/*.dll
-if ($dllfiles) {
-  Copy-Item $dllfiles ..
-}
-Set-Location ..
-Copy-Item cmake/Modules/*.cmake share/darknet/
+if ($InstallDARKNETthroughVCPKG) {
+  if ($ForceVCPKGDarknetHEAD) {
+    $headMode = " --head "
+  }
+  $features = "opencv-base"
+  $feature_manifest_opencv = "--x-feature=opencv-base"
+  if ($EnableCUDA) {
+    $features = $features + ",cuda"
+    $feature_manifest_cuda = "--x-feature=cuda"
+  }
+  if ($EnableCUDNN) {
+    $features = $features + ",cudnn"
+    $feature_manifest_cudnn = "--x-feature=cudnn"
+  }
+  if (-not (Test-Path "${env:VCPKG_ROOT}/vcpkg${exe_ext}")) {
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath ${env:VCPKG_ROOT}/bootstrap-vcpkg${bootstrap_ext} -ArgumentList "-disableMetrics"
+    $handle = $proc.Handle
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-Not ($exitCode -eq 0)) {
+      MyThrow("Bootstrapping vcpkg failed! Exited with error code $exitCode.")
+    }
+  }
+  if ($InstallDARKNETdependenciesThroughVCPKGManifest) {
+    Write-Host "Running vcpkg in manifest mode to install darknet dependencies"
+    Write-Host "vcpkg install --x-no-default-features $feature_manifest_opencv $feature_manifest_cuda $feature_manifest_cudnn $headMode"
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " install --x-no-default-features $feature_manifest_opencv $feature_manifest_cuda $feature_manifest_cudnn $headMode "
+    $handle = $proc.Handle
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-Not ($exitCode -eq 0)) {
+      MyThrow("Installing darknet through vcpkg failed! Exited with error code $exitCode.")
+    }
+  }
+  else {
+    Write-Host "Running vcpkg to install darknet"
+    Write-Host "vcpkg install darknet[${features}] $headMode --recurse"
+    Push-Location ${env:VCPKG_ROOT}
+    if ($ForceVCPKGDarknetHEAD) {
+      $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " --feature-flags=-manifests remove darknet --recurse "
+      $handle = $proc.Handle
+      $proc.WaitForExit()
+      $exitCode = $proc.ExitCode
+      if (-Not ($exitCode -eq 0)) {
+        MyThrow("Removing darknet through vcpkg failed! Exited with error code $exitCode.")
+      }
+    }
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " --feature-flags=-manifests upgrade --no-dry-run "
+    $handle = $proc.Handle
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-Not ($exitCode -eq 0)) {
+      MyThrow("Upgrading vcpkg installed ports failed! Exited with error code $exitCode.")
+    }
+    $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " --feature-flags=-manifests install darknet[${features}] $headMode --recurse "  # "-manifest"  disables the manifest feature, so that if vcpkg is a subfolder of darknet, the vcpkg.json inside darknet folder does not trigger errors due to automatic manifest mode
+    $handle = $proc.Handle
+    $proc.WaitForExit()
+    $exitCode = $proc.ExitCode
+    if (-Not ($exitCode -eq 0)) {
+      MyThrow("Installing darknet dependencies through vcpkg failed! Exited with error code $exitCode.")
+    }
+    Pop-Location
+  }
+}
+else {
+  $build_folder = "./build_release"
+  if (-Not $DoNotDeleteBuildFolder) {
+    Write-Host "Removing folder $build_folder" -ForegroundColor Yellow
+    Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder
+  }
+  New-Item -Path $build_folder -ItemType directory -Force | Out-Null
+  Set-Location $build_folder
+  $cmake_args = "-G `"$generator`" ${AdditionalBuildSetup} -S .."
+  Write-Host "Configuring CMake project" -ForegroundColor Green
+  Write-Host "CMake args: $cmake_args"
+  $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args
+  $handle = $proc.Handle
+  $proc.WaitForExit()
+  $exitCode = $proc.ExitCode
+  if (-Not ($exitCode -eq 0)) {
+    MyThrow("Config failed! Exited with error code $exitCode.")
+  }
+  Write-Host "Building CMake project" -ForegroundColor Green
+  $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${NumberOfBuildWorkers} --target install"
+  $handle = $proc.Handle
+  $proc.WaitForExit()
+  $exitCode = $proc.ExitCode
+  if (-Not ($exitCode -eq 0)) {
+    MyThrow("Config failed! Exited with error code $exitCode.")
+  }
+  Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfig.cmake
+  Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfigVersion.cmake
+  $dllfiles = Get-ChildItem ./${dllfolder}/*.dll
+  if ($dllfiles) {
+    Copy-Item $dllfiles ..
+  }
+  Set-Location ..
+  Copy-Item cmake/Modules/*.cmake share/darknet/
+  Pop-Location
+}
+
 Write-Host "Build complete!" -ForegroundColor Green
-Pop-Location
+
+if ($DownloadWeights) {
+  Write-Host "Downloading weights..." -ForegroundColor Yellow
+  & $PSScriptRoot/scripts/download_weights.ps1
+  Write-Host "Weights downloaded" -ForegroundColor Green
+}
 
 if ($vcpkg_root_set_by_this_script) {
   $env:VCPKG_ROOT = $null

From 28952006b010146344319fd14f9e41b5ee5d828b Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Sun, 27 Jun 2021 04:46:03 +0300
Subject: [PATCH 32/46] Fixed cfg: yolov4-p5.cfg, yolov4-p6.cfg. Added new
 Weights-files, for: yolov4-csp, yolov4-p5, yolov4-p6, yolov4x-mish

---
 build/darknet/x64/cfg/yolov4-p5.cfg |  6 +++---
 build/darknet/x64/cfg/yolov4-p6.cfg | 25 +++++++++++++++----------
 build/darknet/x64/partial.cmd       | 14 +++++++++++++-
 cfg/yolov4-p5.cfg                   |  6 +++---
 cfg/yolov4-p6.cfg                   | 25 +++++++++++++++----------
 5 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/build/darknet/x64/cfg/yolov4-p5.cfg b/build/darknet/x64/cfg/yolov4-p5.cfg
index 3a7848177fd..14bce30ebd2 100644
--- a/build/darknet/x64/cfg/yolov4-p5.cfg
+++ b/build/darknet/x64/cfg/yolov4-p5.cfg
@@ -1737,7 +1737,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -1782,7 +1782,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -1827,7 +1827,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
diff --git a/build/darknet/x64/cfg/yolov4-p6.cfg b/build/darknet/x64/cfg/yolov4-p6.cfg
index 6e241e12e01..8defa150b6f 100644
--- a/build/darknet/x64/cfg/yolov4-p6.cfg
+++ b/build/darknet/x64/cfg/yolov4-p6.cfg
@@ -2143,17 +2143,22 @@ mask = 0,1,2,3
 anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
 classes=80
 num=16
-jitter=.3
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
 ignore_thresh = .7
 truth_thresh = 1
-random=1
-scale_x_y = 1.05
-iou_thresh=0.213
-cls_normalizer=1.0
-iou_normalizer=0.07
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
 iou_loss=ciou
-nms_kind=greedynms
+nms_kind=diounms
 beta_nms=0.6
+new_coords=1
+max_delta=2
 
 
 # YOLO-4
@@ -2193,7 +2198,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -2238,7 +2243,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -2283,7 +2288,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd
index a9b06ca86b7..6080cf68b89 100644
--- a/build/darknet/x64/partial.cmd
+++ b/build/darknet/x64/partial.cmd
@@ -2,10 +2,22 @@ rem Download weights for - DenseNet201, ResNet50 and ResNet152 by this link: htt
 rem Download Yolo/Tiny-yolo: https://pjreddie.com/darknet/yolo/
 rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights
 
+darknet.exe partial cfg/yolov4-csp.cfg yolov4-csp.weights yolov4-csp.conv.142 142
+
+darknet.exe partial cfg/yolov4x-mish.cfg yolov4x-mish.weights yolov4x-mish.conv.166 166
+
+
+
+
+rem darknet.exe partial cfg/yolov4-p5.cfg yolov4-p5.weights yolov4-p5.conv.232 232
+
+rem darknet.exe partial cfg/yolov4-p6.cfg yolov4-p6.weights yolov4-p6.conv.289 289
+
+
 
 rem darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.13 13
 
-darknet.exe partial cfg/yolov4-tiny.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29
+rem darknet.exe partial cfg/yolov4-tiny.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29
 
 
 rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137
diff --git a/cfg/yolov4-p5.cfg b/cfg/yolov4-p5.cfg
index 3a7848177fd..14bce30ebd2 100644
--- a/cfg/yolov4-p5.cfg
+++ b/cfg/yolov4-p5.cfg
@@ -1737,7 +1737,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -1782,7 +1782,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -1827,7 +1827,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
diff --git a/cfg/yolov4-p6.cfg b/cfg/yolov4-p6.cfg
index 6e241e12e01..8defa150b6f 100644
--- a/cfg/yolov4-p6.cfg
+++ b/cfg/yolov4-p6.cfg
@@ -2143,17 +2143,22 @@ mask = 0,1,2,3
 anchors = 13,17,  31,25,  24,51, 61,45,  61,45,  48,102,  119,96,  97,189,  97,189,  217,184,  171,384,  324,451,  324,451, 545,357, 616,618, 1024,1024
 classes=80
 num=16
-jitter=.3
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
 ignore_thresh = .7
 truth_thresh = 1
-random=1
-scale_x_y = 1.05
-iou_thresh=0.213
-cls_normalizer=1.0
-iou_normalizer=0.07
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
 iou_loss=ciou
-nms_kind=greedynms
+nms_kind=diounms
 beta_nms=0.6
+new_coords=1
+max_delta=2
 
 
 # YOLO-4
@@ -2193,7 +2198,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -2238,7 +2243,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6
@@ -2283,7 +2288,7 @@ resize=1.5
 iou_thresh=0.2
 iou_normalizer=0.05
 cls_normalizer=0.5
-obj_normalizer=0.4
+obj_normalizer=1.0
 iou_loss=ciou
 nms_kind=diounms
 beta_nms=0.6

From 24bb6a8ebf5ec32bff4f30ec0801d9a9a9fca3f6 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Tue, 29 Jun 2021 21:32:38 +0300
Subject: [PATCH 33/46] Fixed [<layer>] stopbackward=1, for frozen layers:
 don't calc batch-norm statistic and don't allocate training-buffers.

---
 src/convolutional_kernels.cu |  5 +++--
 src/network.c                |  3 ++-
 src/network_kernels.cu       |  4 +++-
 src/parser.c                 | 43 ++++++++++++++++++++++++++++++++++--
 4 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index ddd140c3088..f44016c52ca 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -165,6 +165,7 @@ half *cuda_make_f16_from_f32_array(float *src, size_t n)
 
 void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 {
+    state.train = l.train;
     if (l.stream >= 0) {
         switch_stream(l.stream);
     }
@@ -1219,8 +1220,8 @@ void pull_convolutional_layer(convolutional_layer l)
 {
     cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights);
     cuda_pull_array_async(l.biases_gpu, l.biases, l.n);
-    cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights);
-    cuda_pull_array_async(l.bias_updates_gpu, l.bias_updates, l.n);
+    if (l.weight_updates_gpu) cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights);
+    if (l.bias_updates_gpu) cuda_pull_array_async(l.bias_updates_gpu, l.bias_updates, l.n);
     if (l.batch_normalize){
         cuda_pull_array_async(l.scales_gpu, l.scales, l.n);
         cuda_pull_array_async(l.rolling_mean_gpu, l.rolling_mean, l.n);
diff --git a/src/network.c b/src/network.c
index 600be1b2d7d..39d9d1ca8f0 100644
--- a/src/network.c
+++ b/src/network.c
@@ -273,7 +273,7 @@ void forward_network(network net, network_state state)
     for(i = 0; i < net.n; ++i){
         state.index = i;
         layer l = net.layers[i];
-        if(l.delta && state.train){
+        if(l.delta && state.train && l.train){
             scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
         }
         //double time = get_time_point();
@@ -297,6 +297,7 @@ void update_network(network net)
     float rate = get_current_rate(net);
     for(i = 0; i < net.n; ++i){
         layer l = net.layers[i];
+        if (l.train == 0) continue;
         if(l.update){
             l.update(l, update_batch, rate, net.momentum, net.decay);
         }
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index 9f4c640b3e9..6f3a651968d 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -76,7 +76,7 @@ void forward_network_gpu(network net, network_state state)
     for(i = 0; i < net.n; ++i){
         state.index = i;
         layer l = net.layers[i];
-        if(l.delta_gpu && state.train){
+        if(l.delta_gpu && state.train && l.train){
             fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
         }
 
@@ -271,6 +271,8 @@ void update_network_gpu(network net)
     float rate = get_current_rate(net);
     for(i = 0; i < net.n; ++i){
         layer l = net.layers[i];
+        if (l.train == 0) continue;
+
         l.t = get_current_batch(net);
         if (iteration_num > (net.max_batches * 1 / 2)) l.deform = 0;
         if (l.burnin_update && (l.burnin_update*net.burn_in > iteration_num)) continue;
diff --git a/src/parser.c b/src/parser.c
index a2d133382c4..d1f2f744510 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1366,8 +1366,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
     net.gpu_index = gpu_index;
     size_params params;
 
-    if (batch > 0) params.train = 0;    // allocates memory for Detection only
-    else params.train = 1;              // allocates memory for Detection & Training
+    if (batch > 0) params.train = 0;    // allocates memory for Inference only
+    else params.train = 1;              // allocates memory for Inference & Training
 
     section *s = (section *)n->val;
     list *options = s->options;
@@ -1395,6 +1395,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
     params.net = net;
     printf("mini_batch = %d, batch = %d, time_steps = %d, train = %d \n", net.batch, net.batch * net.subdivisions, net.time_steps, params.train);
 
+    int last_stop_backward = -1;
     int avg_outputs = 0;
     int avg_counter = 0;
     float bflops = 0;
@@ -1408,8 +1409,28 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
     n = n->next;
     int count = 0;
     free_section(s);
+
+    // find l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
+    node *n_tmp = n;
+    int count_tmp = 0;
+    while (n_tmp) {
+        s = (section *)n_tmp->val;
+        options = s->options;
+        int stopbackward = option_find_int_quiet(options, "stopbackward", 0);
+        if (stopbackward == 1) {
+            last_stop_backward = count_tmp;
+            printf("last_stop_backward = %d \n", last_stop_backward);
+        }
+        n_tmp = n_tmp->next;
+        ++count_tmp;
+    }
+
     fprintf(stderr, "   layer   filters  size/strd(dil)      input                output\n");
     while(n){
+
+        if (count < last_stop_backward) params.train = 0;
+        else params.train = 1;
+
         params.index = count;
         fprintf(stderr, "%4d ", count);
         s = (section *)n->val;
@@ -1662,6 +1683,24 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
             avg_counter++;
         }
     }
+
+    if (last_stop_backward > -1) {
+        int k;
+        for (k = 0; k < last_stop_backward; ++k) {
+            layer l = net.layers[k];
+            if (l.keep_delta_gpu) {
+                if (!l.delta) l.delta = (float*)xcalloc(l.outputs, sizeof(float));
+#ifdef GPU
+                if (!l.delta_gpu) l.delta_gpu = (float *)cuda_make_array(NULL, l.outputs);
+#endif
+            }
+
+            l.onlyforward = 1;
+            l.train = 0;
+            net.layers[k] = l;
+        }
+    }
+
     free_list(sections);
 
 #ifdef GPU

From 85c6278ef1f8a0bed424567c5ac86da44a65474a Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Tue, 29 Jun 2021 22:09:26 +0300
Subject: [PATCH 34/46] Added cfg/yolov4-p5-frozen.cfg for training with
 pre-trained COCO-weights file yolov4-p5.weights

---
 build/darknet/x64/cfg/yolov4-p5-frozen.cfg | 1838 ++++++++++++++++++++
 cfg/yolov4-p5-frozen.cfg                   | 1838 ++++++++++++++++++++
 src/parser.c                               |    8 +-
 3 files changed, 3682 insertions(+), 2 deletions(-)
 create mode 100644 build/darknet/x64/cfg/yolov4-p5-frozen.cfg
 create mode 100644 cfg/yolov4-p5-frozen.cfg

diff --git a/build/darknet/x64/cfg/yolov4-p5-frozen.cfg b/build/darknet/x64/cfg/yolov4-p5-frozen.cfg
new file mode 100644
index 00000000000..38bebe32a21
--- /dev/null
+++ b/build/darknet/x64/cfg/yolov4-p5-frozen.cfg
@@ -0,0 +1,1838 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=896
+height=896
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+ema_alpha=0.9998
+
+#use_cuda_graph = 1
+
+
+# ============ Backbone ============ #
+
+# Stem 
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+
+# P1
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-7
+
+# Transition last
+
+# 10 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P2
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-13
+
+# Transition last
+
+# 26 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P3
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 78 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P4
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 130 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P5
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 158 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Backbone ============ #
+
+# ============ Neck ============ #
+
+# CSPSPP
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+# 173 (previous+6+5+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# End of CSPSPP
+
+
+# FPN-4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 130
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 189 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 78
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 205 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 189
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 218 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-5
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 173
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 231 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+stopbackward=1
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+# YOLO-3
+
+[route]
+layers = 205
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 0,1,2,3
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-4
+
+[route]
+layers = 218
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 4,5,6,7
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-5
+
+[route]
+layers = 231
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 8,9,10,11
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+# ============ End of Head ============ #
\ No newline at end of file
diff --git a/cfg/yolov4-p5-frozen.cfg b/cfg/yolov4-p5-frozen.cfg
new file mode 100644
index 00000000000..38bebe32a21
--- /dev/null
+++ b/cfg/yolov4-p5-frozen.cfg
@@ -0,0 +1,1838 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=896
+height=896
+channels=3
+momentum=0.949
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500500
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+mosaic=1
+
+letter_box=1
+
+ema_alpha=0.9998
+
+#use_cuda_graph = 1
+
+
+# ============ Backbone ============ #
+
+# Stem 
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+
+# P1
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-7
+
+# Transition last
+
+# 10 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P2
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-13
+
+# Transition last
+
+# 26 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P3
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 78 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P4
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-49
+
+# Transition last
+
+# 130 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# P5
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Residual Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=mish
+
+[shortcut]
+from=-3
+activation=linear
+
+# Transition first
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Merge [-1, -(3k+4)]
+
+[route]
+layers = -1,-25
+
+# Transition last
+
+# 158 (previous+7+3k)
+[convolutional]
+batch_normalize=1
+filters=1024
+size=1
+stride=1
+pad=1
+activation=mish
+
+# ============ End of Backbone ============ #
+
+# ============ Neck ============ #
+
+# CSPSPP
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, -13
+
+# 173 (previous+6+5+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# End of CSPSPP
+
+
+# FPN-4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 130
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 189 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# FPN-3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[upsample]
+stride=2
+
+[route]
+layers = 78
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -1, -3
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=mish
+
+# Merge [-1, -(2k+2)]
+
+[route]
+layers = -1, -8
+
+# Transition last
+
+# 205 (previous+6+4+2k)
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-4
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1, 189
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 218 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=mish
+
+
+# PAN-5
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1, 173
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+# Split
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[route]
+layers = -2
+
+# Plain Block
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[route]
+layers = -1,-8
+
+# Transition last
+
+# 231 (previous+3+4+2k)
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=mish
+stopbackward=1
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+# YOLO-3
+
+[route]
+layers = 205
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 0,1,2,3
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-4
+
+[route]
+layers = 218
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 4,5,6,7
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# YOLO-5
+
+[route]
+layers = 231
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=mish
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=340
+activation=logistic
+#activation=linear
+# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet
+
+[yolo]
+mask = 8,9,10,11
+anchors = 13,17,  31,25,  24,51,  61,45,  48,102,  119,96, 97,189, 217,184,  171,384, 324,451, 616,618, 800,800
+classes=80
+num=12
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+# ============ End of Head ============ #
\ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
index d1f2f744510..37c8e0776b0 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1499,7 +1499,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
             int k;
             for (k = 0; k < l.n; ++k) {
                 net.layers[l.input_layers[k]].use_bin_output = 0;
-                net.layers[l.input_layers[k]].keep_delta_gpu = 1;
+                if (count >= last_stop_backward)
+                    net.layers[l.input_layers[k]].keep_delta_gpu = 1;
             }
         }else if (lt == UPSAMPLE) {
             l = parse_upsample(options, params, net);
@@ -1507,7 +1508,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
             l = parse_shortcut(options, params, net);
             net.layers[count - 1].use_bin_output = 0;
             net.layers[l.index].use_bin_output = 0;
-            net.layers[l.index].keep_delta_gpu = 1;
+            if (count >= last_stop_backward)
+                net.layers[l.index].keep_delta_gpu = 1;
         }else if (lt == SCALE_CHANNELS) {
             l = parse_scale_channels(options, params, net);
             net.layers[count - 1].use_bin_output = 0;
@@ -1655,6 +1657,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
         option_unused(options);
 
+        if (l.stopbackward == 1) printf(" ------- previous layers are frozen ------- \n");
+
         net.layers[count] = l;
         if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
         if (l.inputs > max_inputs) max_inputs = l.inputs;

From c2221f07f88c9ff189c77c10a7381c22b59c7d58 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Tue, 29 Jun 2021 23:54:58 +0300
Subject: [PATCH 35/46] Fixed adversarial training

---
 include/darknet.h      |  2 +-
 src/image.c            |  4 ++--
 src/network_kernels.cu | 17 +++++++++++++++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/include/darknet.h b/include/darknet.h
index f5170f375cd..6daaaf1ca65 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -1056,7 +1056,7 @@ LIB_API void optimize_picture(network *net, image orig, int max_layer, float sca
 
 // image.h
 LIB_API void make_image_red(image im);
-LIB_API image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c);
+LIB_API image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c, float alpha);
 LIB_API image resize_image(image im, int w, int h);
 LIB_API void quantize_image(image im);
 LIB_API void copy_image_from_bytes(image im, char *pdata);
diff --git a/src/image.c b/src/image.c
index 07942c71e6e..8ef026d33a2 100644
--- a/src/image.c
+++ b/src/image.c
@@ -1351,7 +1351,7 @@ void make_image_red(image im)
     }
 }
 
-image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c)
+image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c, float alpha)
 {
     image attention_img;
     attention_img.w = w;
@@ -1379,7 +1379,7 @@ image make_attention_image(int img_size, float *original_delta_cpu, float *origi
     image resized = resize_image(attention_img, w / 4, h / 4);
     attention_img = resize_image(resized, w, h);
     free_image(resized);
-    for (k = 0; k < img_size; ++k) attention_img.data[k] += original_input_cpu[k];
+    for (k = 0; k < img_size; ++k) attention_img.data[k] = attention_img.data[k]*alpha + (1-alpha)*original_input_cpu[k];
 
     //normalize_image(attention_img);
     //show_image(attention_img, "delta");
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index 6f3a651968d..ac3403cf547 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -76,7 +76,7 @@ void forward_network_gpu(network net, network_state state)
     for(i = 0; i < net.n; ++i){
         state.index = i;
         layer l = net.layers[i];
-        if(l.delta_gpu && state.train && l.train){
+        if(l.delta_gpu && state.train){
             fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
         }
 
@@ -235,12 +235,25 @@ void backward_network_gpu(network net, network_state state)
         cuda_pull_array(original_input, original_input_cpu, img_size);
         cuda_pull_array(original_delta, original_delta_cpu, img_size);
 
-        image attention_img = make_attention_image(img_size, original_delta_cpu, original_input_cpu, net.w, net.h, net.c);
+        image attention_img = make_attention_image(img_size, original_delta_cpu, original_input_cpu, net.w, net.h, net.c, 0.7);
         show_image(attention_img, "attention_img");
         resize_window_cv("attention_img", 500, 500);
 
+        //static int img_counter = 0;
+        //img_counter++;
+        //char buff[256];
+        //sprintf(buff, "attention_img_%d.png", img_counter);
+        //save_image_png(attention_img, buff);
         free_image(attention_img);
 
+        image attention_mask_img = make_attention_image(img_size, original_delta_cpu, original_delta_cpu, net.w, net.h, net.c, 1.0);
+        show_image(attention_mask_img, "attention_mask_img");
+        resize_window_cv("attention_mask_img", 500, 500);
+
+        //sprintf(buff, "attention_mask_img_%d.png", img_counter);
+        //save_image_png(attention_mask_img, buff);
+        free_image(attention_mask_img);
+
         free(original_input_cpu);
         free(original_delta_cpu);
     }

From 97ccee56283ad92118153f0cd1036d1ea07ba115 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Tue, 29 Jun 2021 23:55:34 +0300
Subject: [PATCH 36/46] minor fix - added: move_window_cv() function

---
 src/image_opencv.cpp | 11 +++++++++++
 src/image_opencv.h   |  1 +
 2 files changed, 12 insertions(+)

diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp
index f9615cb00ae..2524f1593ca 100644
--- a/src/image_opencv.cpp
+++ b/src/image_opencv.cpp
@@ -385,6 +385,17 @@ extern "C" void resize_window_cv(char const* window_name, int width, int height)
 }
 // ----------------------------------------
 
+extern "C" void move_window_cv(char const* window_name, int x, int y)
+{
+    try {
+        cv::moveWindow(window_name, x, y);
+    }
+    catch (...) {
+        cerr << "OpenCV exception: create_window_cv \n";
+    }
+}
+// ----------------------------------------
+
 extern "C" void destroy_all_windows_cv()
 {
     try {
diff --git a/src/image_opencv.h b/src/image_opencv.h
index 3aeb4478390..19d16e1d9b5 100644
--- a/src/image_opencv.h
+++ b/src/image_opencv.h
@@ -48,6 +48,7 @@ image mat_to_image_cv(mat_cv *mat);
 // Window
 void create_window_cv(char const* window_name, int full_screen, int width, int height);
 void resize_window_cv(char const* window_name, int width, int height);
+void move_window_cv(char const* window_name, int x, int y);
 void destroy_all_windows_cv();
 int wait_key_cv(int delay);
 int wait_until_press_key_cv();

From 90c15f8f04ec46a855dd10d55f2c296bf6b13567 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Wed, 30 Jun 2021 06:37:57 +0300
Subject: [PATCH 37/46] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 42007e975d6..edf899bf6d8 100644
--- a/README.md
+++ b/README.md
@@ -220,7 +220,7 @@ You can get cfg-files by path: `darknet/cfg/`
   - [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch)
   - [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4)
   - [YOLOv5](https://github.com/ultralytics/yolov5)
-- **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/
+- **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/ and https://github.com/ttanzhiqiang/onnx_tensorrt_project
 - **nVidia Transfer Learning Toolkit (TLT>=3.0)** Training and Detection https://docs.nvidia.com/metropolis/TLT/tlt-user-guide/text/object_detection/yolo_v4.html
 - **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results
 - **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)

From 86ced7151a71c05fab57bc14f77d6a4bb97b9ee6 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Wed, 30 Jun 2021 06:51:24 +0300
Subject: [PATCH 38/46] Update README.md

---
 README.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index edf899bf6d8..6912b30dc89 100644
--- a/README.md
+++ b/README.md
@@ -145,13 +145,19 @@ There are weights-file for different cfg-files (trained for MS COCO dataset):
 
 FPS on RTX 2070 (R) and Tesla V100 (V):
 
-- [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights)
+- [yolov4-p6.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-p6.cfg) - 1280x1280 - **72.1% mAP@0.5 (54.0% AP@0.5:0.95) - 32(V) FPS** - xxx BFlops (xxx FMA) - 487 MB: [yolov4-p6.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p6.weights)
+  - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p6.conv.289
+
+- [yolov4-p5.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-p5.cfg) - 896x896 - **70.0% mAP@0.5 (51.6% AP@0.5:0.95) - 43(V) FPS** - xxx BFlops (xxx FMA) - 271 MB: [yolov4-p5.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p5.weights)
+  - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p5.conv.232
+
+- [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **68.5% mAP@0.5 (50.1% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights)
   - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166
 
 - [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036)
 
     just change `width=` and `height=` parameters in `yolov4-csp.cfg` file and use the same `yolov4-csp.weights` file for all cases:
-  - `width=640 height=640` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops
+  - `width=640 height=640` in cfg: **67.4% mAP@0.5 (48.7% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops
   - `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops
   - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142
 

From 9c9232d1c3f0f80e40bf347643a542903d6703ca Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Sat, 3 Jul 2021 04:18:25 +0300
Subject: [PATCH 39/46] Bug fix

---
 src/parser.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/parser.c b/src/parser.c
index 37c8e0776b0..6d5590e8f55 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1429,7 +1429,6 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
     while(n){
 
         if (count < last_stop_backward) params.train = 0;
-        else params.train = 1;
 
         params.index = count;
         fprintf(stderr, "%4d ", count);
@@ -1693,9 +1692,9 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         for (k = 0; k < last_stop_backward; ++k) {
             layer l = net.layers[k];
             if (l.keep_delta_gpu) {
-                if (!l.delta) l.delta = (float*)xcalloc(l.outputs, sizeof(float));
+                if (!l.delta) l.delta = (float*)xcalloc(l.outputs*l.batch, sizeof(float));
 #ifdef GPU
-                if (!l.delta_gpu) l.delta_gpu = (float *)cuda_make_array(NULL, l.outputs);
+                if (!l.delta_gpu) l.delta_gpu = (float *)cuda_make_array(NULL, l.outputs*l.batch);
 #endif
             }
 

From b2cb64dffbcf706ac9f1d12d7fe699c40eacc40b Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Sat, 3 Jul 2021 04:25:42 +0300
Subject: [PATCH 40/46] bug fix

---
 src/parser.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/parser.c b/src/parser.c
index 6d5590e8f55..8930791787f 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1425,9 +1425,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         ++count_tmp;
     }
 
+    int old_params_train = params.train;
+
     fprintf(stderr, "   layer   filters  size/strd(dil)      input                output\n");
     while(n){
 
+        params.train = old_params_train;
         if (count < last_stop_backward) params.train = 0;
 
         params.index = count;

From 335ac662c84421982ca3a54a47e64926b88297d5 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Sat, 3 Jul 2021 04:46:55 +0300
Subject: [PATCH 41/46] Improved [net] optimized_memory=1 to reduce GPU memory
 usage.

---
 src/parser.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/parser.c b/src/parser.c
index 8930791787f..db28e737571 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1615,7 +1615,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
 #ifdef GPU
         // futher GPU-memory optimization: net.optimized_memory == 2
         l.optimized_memory = net.optimized_memory;
-        if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT)
+        if (net.optimized_memory == 1 && params.train && l.type != DROPOUT) {
+            if (l.delta_gpu) {
+                cuda_free(l.delta_gpu);
+                l.delta_gpu = NULL;
+            }
+        } else if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT)
         {
             if (l.output_gpu) {
                 cuda_free(l.output_gpu);
@@ -1732,6 +1737,9 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
                 }
                 l.delta_gpu = net.global_delta_gpu;
             }
+            else {
+                if (!l.delta_gpu) l.delta_gpu = (float *)cuda_make_array(NULL, l.outputs*l.batch);
+            }
 
             // maximum optimization
             if (net.optimized_memory >= 3 && l.type != DROPOUT) {

From be3985210ed336477b2e1317fd13c6cf1494adb1 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Sun, 4 Jul 2021 06:31:45 +0300
Subject: [PATCH 42/46] fix possible bug

---
 src/convolutional_kernels.cu |  3 ++-
 src/parser.c                 | 33 +++++++++++++++++++--------------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index f44016c52ca..1e9cdd9c739 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -165,7 +165,8 @@ half *cuda_make_f16_from_f32_array(float *src, size_t n)
 
 void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 {
-    state.train = l.train;
+    if (l.train == 0) state.train = 0;
+
     if (l.stream >= 0) {
         switch_stream(l.stream);
     }
diff --git a/src/parser.c b/src/parser.c
index db28e737571..1a345b5cc1b 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1413,16 +1413,18 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
     // find l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
     node *n_tmp = n;
     int count_tmp = 0;
-    while (n_tmp) {
-        s = (section *)n_tmp->val;
-        options = s->options;
-        int stopbackward = option_find_int_quiet(options, "stopbackward", 0);
-        if (stopbackward == 1) {
-            last_stop_backward = count_tmp;
-            printf("last_stop_backward = %d \n", last_stop_backward);
+    if (params.train == 1) {
+        while (n_tmp) {
+            s = (section *)n_tmp->val;
+            options = s->options;
+            int stopbackward = option_find_int_quiet(options, "stopbackward", 0);
+            if (stopbackward == 1) {
+                last_stop_backward = count_tmp;
+                printf("last_stop_backward = %d \n", last_stop_backward);
+            }
+            n_tmp = n_tmp->next;
+            ++count_tmp;
         }
-        n_tmp = n_tmp->next;
-        ++count_tmp;
     }
 
     int old_params_train = params.train;
@@ -1700,15 +1702,18 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
         for (k = 0; k < last_stop_backward; ++k) {
             layer l = net.layers[k];
             if (l.keep_delta_gpu) {
-                if (!l.delta) l.delta = (float*)xcalloc(l.outputs*l.batch, sizeof(float));
+                if (!l.delta) {
+                    net.layers[k].delta = (float*)xcalloc(l.outputs*l.batch, sizeof(float));
+                }
 #ifdef GPU
-                if (!l.delta_gpu) l.delta_gpu = (float *)cuda_make_array(NULL, l.outputs*l.batch);
+                if (!l.delta_gpu) {
+                    net.layers[k].delta_gpu = (float *)cuda_make_array(NULL, l.outputs*l.batch);
+                }
 #endif
             }
 
-            l.onlyforward = 1;
-            l.train = 0;
-            net.layers[k] = l;
+            net.layers[k].onlyforward = 1;
+            net.layers[k].train = 0;
         }
     }
 

From 005513a9db14878579adfbb61083962c99bb0a89 Mon Sep 17 00:00:00 2001
From: AlexeyAB <kikots@mail.ru>
Date: Sun, 4 Jul 2021 06:38:49 +0300
Subject: [PATCH 43/46] minor fix

---
 src/network.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/network.c b/src/network.c
index 39d9d1ca8f0..c41932479fd 100644
--- a/src/network.c
+++ b/src/network.c
@@ -1459,6 +1459,7 @@ void copy_weights_net(network net_train, network *net_map)
         }
         net_map->layers[k].batch = 1;
         net_map->layers[k].steps = 1;
+        net_map->layers[k].train = 0;
     }
 }
 

From 842a6025846640c110654b3bdfdb64fe3d10d5c9 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Thu, 8 Jul 2021 01:13:29 +0300
Subject: [PATCH 44/46] Update README.md

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index 6912b30dc89..3b5a4e1bff4 100644
--- a/README.md
+++ b/README.md
@@ -768,3 +768,27 @@ public:
 #endif
 };
 ```
+
+## Citation
+
+```
+@misc{bochkovskiy2020yolov4,
+      title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, 
+      author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao},
+      year={2020},
+      eprint={2004.10934},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
+
+```
+@InProceedings{Wang_2021_CVPR,
+    author    = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark},
+    title     = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2021},
+    pages     = {13029-13038}
+}
+```

From 08088dccbbae7bb68c41d5ae3355380ce6654663 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Thu, 8 Jul 2021 01:14:28 +0300
Subject: [PATCH 45/46] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 3b5a4e1bff4..bb545cf097a 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,7 @@ About Darknet framework: http://pjreddie.com/darknet/
   - [How to improve object detection](#how-to-improve-object-detection)
   - [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files)
   - [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries)
+- [Citation](#citation)
 
 ![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png)
 

From 9c26b291fabad663656e1e52d125134564f539d1 Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Fri, 9 Jul 2021 13:50:38 +0200
Subject: [PATCH 46/46] fix python main module to be compiled with recent build
 scripts (#7876)

* fix python main module to be compiled with recent build scripts

* fixes for posix systems
---
 .gitignore |  2 ++
 darknet.py | 78 ++++++++++--------------------------------------------
 2 files changed, 16 insertions(+), 64 deletions(-)

diff --git a/.gitignore b/.gitignore
index 916cfb88461..a1d890429b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@
 *.dll
 *.lib
 *.dylib
+*.pyc
 mnist/
 data/
 caffe/
@@ -39,6 +40,7 @@ build/.ninja_log
 build/Makefile
 */vcpkg-manifest-install.log
 build.log
+__pycache__/
 
 # OS Generated #
 .DS_Store*
diff --git a/darknet.py b/darknet.py
index ad526f99d95..698f0469ad5 100644
--- a/darknet.py
+++ b/darknet.py
@@ -1,26 +1,13 @@
-#!python3
+#!/usr/bin/env python3
+
 """
 Python 3 wrapper for identifying objects in images
 
-Requires DLL compilation
-
-Both the GPU and no-GPU version should be compiled; the no-GPU version should be renamed "yolo_cpp_dll_nogpu.dll".
-
-On a GPU system, you can force CPU evaluation by any of:
-
-- Set global variable DARKNET_FORCE_CPU to True
-- Set environment variable CUDA_VISIBLE_DEVICES to -1
-- Set environment variable "FORCE_CPU" to "true"
-- Set environment variable "DARKNET_PATH" to path darknet lib .so (for Linux)
-
+Running the script requires opencv-python to be installed (`pip install opencv-python`)
 Directly viewing or returning bounding-boxed images requires scikit-image to be installed (`pip install scikit-image`)
-
-Original *nix 2.7: https://github.com/pjreddie/darknet/blob/0f110834f4e18b30d5f101bf8f1724c34b7b83db/python/darknet.py
-Windows Python 2.7 version: https://github.com/AlexeyAB/darknet/blob/fc496d52bf22a0bb257300d3c79be9cd80e722cb/build/darknet/x64/darknet.py
-
-@author: Philip Kahn
-@date: 20180503
+Use pip3 instead of pip on some systems to be sure to install modules for python3
 """
+
 from ctypes import *
 import math
 import random
@@ -178,51 +165,17 @@ def detect_image(network, class_names, image, thresh=.5, hier_thresh=.5, nms=.45
     return sorted(predictions, key=lambda x: x[1])
 
 
-#  lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
-#  lib = CDLL("libdarknet.so", RTLD_GLOBAL)
-hasGPU = True
-if os.name == "nt":
+if os.name == "posix":
+    cwd = os.path.dirname(__file__)
+    lib = CDLL(cwd + "/libdarknet.so", RTLD_GLOBAL)
+elif os.name == "nt":
     cwd = os.path.dirname(__file__)
     os.environ['PATH'] = cwd + ';' + os.environ['PATH']
-    winGPUdll = os.path.join(cwd, "yolo_cpp_dll.dll")
-    winNoGPUdll = os.path.join(cwd, "yolo_cpp_dll_nogpu.dll")
-    envKeys = list()
-    for k, v in os.environ.items():
-        envKeys.append(k)
-    try:
-        try:
-            tmp = os.environ["FORCE_CPU"].lower()
-            if tmp in ["1", "true", "yes", "on"]:
-                raise ValueError("ForceCPU")
-            else:
-                print("Flag value {} not forcing CPU mode".format(tmp))
-        except KeyError:
-            # We never set the flag
-            if 'CUDA_VISIBLE_DEVICES' in envKeys:
-                if int(os.environ['CUDA_VISIBLE_DEVICES']) < 0:
-                    raise ValueError("ForceCPU")
-            try:
-                global DARKNET_FORCE_CPU
-                if DARKNET_FORCE_CPU:
-                    raise ValueError("ForceCPU")
-            except NameError as cpu_error:
-                print(cpu_error)
-        if not os.path.exists(winGPUdll):
-            raise ValueError("NoDLL")
-        lib = CDLL(winGPUdll, RTLD_GLOBAL)
-    except (KeyError, ValueError):
-        hasGPU = False
-        if os.path.exists(winNoGPUdll):
-            lib = CDLL(winNoGPUdll, RTLD_GLOBAL)
-            print("Notice: CPU-only mode")
-        else:
-            # Try the other way, in case no_gpu was compile but not renamed
-            lib = CDLL(winGPUdll, RTLD_GLOBAL)
-            print("Environment variables indicated a CPU run, but we didn't find {}. Trying a GPU run anyway.".format(winNoGPUdll))
+    lib = CDLL("darknet.dll", RTLD_GLOBAL)
 else:
-    lib = CDLL(os.path.join(
-        os.environ.get('DARKNET_PATH', './'),
-        "libdarknet.so"), RTLD_GLOBAL)
+    print("Unsupported OS")
+    exit
+
 lib.network_width.argtypes = [c_void_p]
 lib.network_width.restype = c_int
 lib.network_height.argtypes = [c_void_p]
@@ -235,10 +188,7 @@ def detect_image(network, class_names, image, thresh=.5, hier_thresh=.5, nms=.45
 predict.argtypes = [c_void_p, POINTER(c_float)]
 predict.restype = POINTER(c_float)
 
-if hasGPU:
-    set_gpu = lib.cuda_set_device
-    set_gpu.argtypes = [c_int]
-
+set_gpu = lib.cuda_set_device
 init_cpu = lib.init_cpu
 
 make_image = lib.make_image