diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 077b3cf99f8..c8d65b796b4 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -1,6 +1,10 @@ name: Darknet Continuous Integration -on: [push, pull_request, workflow_dispatch] +on: + push: + workflow_dispatch: + schedule: + - cron: '0 0 * * *' env: VCPKG_BINARY_SOURCES: 'clear;nuget,vcpkgbinarycache,readwrite' @@ -17,24 +21,13 @@ jobs: run: sudo apt install libopencv-dev - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' run: | - sudo apt update - sudo apt-get dist-upgrade -y - sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /" - sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2 - sudo apt-get install -y --no-install-recommends libcudnn8-dev - sudo rm -rf /usr/local/cuda sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so - sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda - export PATH=/usr/local/cuda/bin:$PATH - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH - nvcc --version - gcc --version - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0' run: | @@ -72,50 +65,38 @@ jobs: make clean - ubuntu-vcpkg-cuda: + ubuntu-vcpkg-opencv4-cuda: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 + - uses: lukka/get-cmake@latest + - name: Update apt run: sudo apt update - name: Install dependencies run: sudo apt install yasm nasm - - uses: lukka/get-cmake@latest - - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' run: | - sudo apt update - sudo apt-get dist-upgrade -y - sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /" - sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2 - sudo apt-get install -y --no-install-recommends libcudnn8-dev - sudo rm -rf /usr/local/cuda sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so - sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda - export PATH=/usr/local/cuda/bin:$PATH - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH - nvcc --version - gcc --version - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > - git clone https://github.com/microsoft/vcpkg; - ./vcpkg/bootstrap-vcpkg.sh; + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) - sources add - -source "https://nuget.pkg.github.com/cenit/index.json" - -storepasswordincleartext - -name "vcpkgbinarycache" - -username "cenit" - -password "${{ secrets.GITHUB_TOKEN }}" + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json - name: 'Build' shell: pwsh @@ -124,7 +105,7 @@ jobs: CUDA_PATH: "/usr/local/cuda" CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -ForceStaticLib + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -144,6 +125,92 @@ jobs: path: ${{ github.workspace }}/uselib* + ubuntu-vcpkg-opencv3-cuda: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install yasm nasm + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-vcpkg-opencv2-cuda: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install yasm nasm + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateDARKNET + + ubuntu: runs-on: ubuntu-20.04 steps: @@ -163,7 +230,7 @@ jobs: CUDA_PATH: "/usr/local/cuda" CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ./build.ps1 -EnableOPENCV + run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -196,24 +263,13 @@ jobs: - uses: lukka/get-cmake@latest - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' run: | - sudo apt update - sudo apt-get dist-upgrade -y - sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /" - sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2 - sudo apt-get install -y --no-install-recommends libcudnn8-dev - sudo rm -rf /usr/local/cuda sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so - sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda - export PATH=/usr/local/cuda/bin:$PATH - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH - nvcc --version - gcc --version - name: 'Build' shell: pwsh @@ -222,7 +278,7 @@ jobs: CUDA_PATH: "/usr/local/cuda" CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ./build.ps1 -EnableOPENCV -EnableCUDA + run: ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -251,7 +307,29 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -ForceCPP + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-setup-sh: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Setup' + shell: bash + run: ./scripts/setup.sh -InstallCUDA -BypassDRIVER osx-vcpkg: @@ -267,19 +345,18 @@ jobs: - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > - git clone https://github.com/microsoft/vcpkg; - ./vcpkg/bootstrap-vcpkg.sh; + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) - sources add - -source "https://nuget.pkg.github.com/cenit/index.json" - -storepasswordincleartext - -name "vcpkgbinarycache" - -username "cenit" - -password "${{ secrets.GITHUB_TOKEN }}" + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json - name: 'Build' shell: pwsh - run: ./build.ps1 -UseVCPKG + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -311,7 +388,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -EnableOPENCV + run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -340,7 +417,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -ForceCPP + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET win-vcpkg: @@ -353,19 +430,18 @@ jobs: - name: 'Setup vcpkg and NuGet artifacts backend' shell: bash run: > - git clone https://github.com/microsoft/vcpkg; - ./vcpkg/bootstrap-vcpkg.sh; + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; $(./vcpkg/vcpkg fetch nuget | tail -n 1) - sources add - -source "https://nuget.pkg.github.com/cenit/index.json" - -storepasswordincleartext - -name "vcpkgbinarycache" - -username "cenit" - -password "${{ secrets.GITHUB_TOKEN }}" + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json - name: 'Build' shell: pwsh - run: ./build.ps1 -UseVCPKG -EnableOPENCV + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -382,13 +458,35 @@ jobs: - uses: actions/upload-artifact@v2 with: name: darknet-vcpkg-${{ runner.os }} - path: ${{ runner.workspace }}/buildDirectory/Release/*.dll + path: ${{ github.workspace }}/build_release/*.dll - uses: actions/upload-artifact@v2 with: name: darknet-vcpkg-${{ runner.os }} path: ${{ github.workspace }}/uselib* + win-vcpkg-port: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -UseVCPKG -InstallDARKNETthroughVCPKG -ForceVCPKGDarknetHEAD -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET + + win-intlibs: runs-on: windows-latest steps: @@ -398,7 +496,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 + run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -422,6 +520,28 @@ jobs: path: ${{ github.workspace }}/uselib* + win-setup-ps1: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) + setapikey ${{ secrets.BAGET_API_KEY }} + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Setup' + shell: pwsh + run: ./scripts/setup.ps1 -InstallCUDA + + win-intlibs-cpp: runs-on: windows-latest steps: @@ -431,7 +551,19 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -ForceCPP + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET + + + win-csharp: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -EnableCSharpWrapper -DisableInteractive -DoNotUpdateDARKNET win-intlibs-cuda: @@ -439,22 +571,17 @@ jobs: steps: - uses: actions/checkout@v2 - name: 'Install CUDA' - run: | - choco install cuda --version=10.2.89.20191206 -y - $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." - Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - refreshenv + run: ./scripts/deploy-cuda.ps1 - uses: lukka/get-cmake@latest - name: 'Build' env: - CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" - CUDA_PATH_V10_2: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" - CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" - CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe" + CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3\\bin\\nvcc.exe" shell: pwsh - run: ./build.ps1 -EnableCUDA + run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET mingw: diff --git a/.github/workflows/on_pr.yml b/.github/workflows/on_pr.yml new file mode 100644 index 00000000000..9f0a664ebcb --- /dev/null +++ b/.github/workflows/on_pr.yml @@ -0,0 +1,429 @@ +name: Darknet Pull Requests + +on: [pull_request] + +env: + VCPKG_BINARY_SOURCES: 'clear;nuget,vcpkgbinarycache,read' + +jobs: + ubuntu-makefile: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install libopencv-dev + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 -j 8 + make clean + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 DEBUG=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=0 AVX=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=0 CUDNN=0 OPENCV=1' + run: | + make LIBSO=1 GPU=0 CUDNN=0 OPENCV=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1' + run: | + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1' + run: | + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 -j 8 + make clean + - name: 'LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1' + run: | + export PATH=/usr/local/cuda/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH + make LIBSO=1 GPU=1 CUDNN=1 OPENCV=1 CUDNN_HALF=1 USE_CPP=1 -j 8 + make clean + + + ubuntu-vcpkg-opencv4-cuda: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install yasm nasm + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-vcpkg-opencv3-cuda: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install yasm nasm + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 3 -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-vcpkg-opencv2-cuda: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install yasm nasm + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -ForceOpenCVVersion 2 -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install libopencv-dev + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-cuda: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Update apt + run: sudo apt update + - name: Install dependencies + run: sudo apt install libopencv-dev + + - uses: lukka/get-cmake@latest + + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.sh + + - name: 'Create softlinks for CUDA' + run: | + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + + - name: 'Build' + shell: pwsh + env: + CUDACXX: "/usr/local/cuda/bin/nvcc" + CUDA_PATH: "/usr/local/cuda" + CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" + LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" + run: ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-no-ocv-cpp: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET + + + ubuntu-setup-sh: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Setup' + shell: bash + run: ./scripts/setup.sh -InstallCUDA -BypassDRIVER + + + osx-vcpkg: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + + - name: Install dependencies + run: brew install libomp yasm nasm + + - uses: lukka/get-cmake@latest + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET + + + osx: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + + - name: Install dependencies + run: brew install opencv libomp + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET + + + osx-no-ocv-no-omp-cpp: + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET + + + win-vcpkg: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET + + + win-intlibs: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET + + + win-setup-ps1: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - name: 'Setup vcpkg and NuGet artifacts backend' + shell: bash + run: > + git clone https://github.com/microsoft/vcpkg ; + ./vcpkg/bootstrap-vcpkg.sh ; + $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add + -Name "vcpkgbinarycache" + -Source http://93.49.111.10:5555/v3/index.json + + - name: 'Setup' + shell: pwsh + run: ./scripts/setup.ps1 -InstallCUDA + + + win-intlibs-cpp: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET + + + win-csharp: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + shell: pwsh + run: ./build.ps1 -EnableCSharpWrapper -DisableInteractive -DoNotUpdateDARKNET + + + win-intlibs-cuda: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - name: 'Install CUDA' + run: ./scripts/deploy-cuda.ps1 + + - uses: lukka/get-cmake@latest + + - name: 'Build' + env: + CUDA_PATH: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3\\bin\\nvcc.exe" + shell: pwsh + run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET + + + mingw: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: 'Build with CMake' + uses: lukka/run-cmake@v3 + with: + cmakeListsOrSettingsJson: CMakeListsTxtAdvanced + cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' + useVcpkgToolchainFile: true + buildDirectory: '${{ runner.workspace }}/buildDirectory' + cmakeAppendedArgs: "-G\"MinGW Makefiles\" -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=OFF -DENABLE_CUDNN=OFF -DENABLE_OPENCV=OFF" + cmakeBuildType: 'Release' + buildWithCMakeArgs: '--config Release --target install' diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml new file mode 100644 index 00000000000..251a259ffcf --- /dev/null +++ b/.github/workflows/rebase.yml @@ -0,0 +1,19 @@ +name: Automatic Rebase +on: + issue_comment: + types: [created] +jobs: + rebase: + name: Rebase + if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'CONTRIBUTOR') + runs-on: ubuntu-latest + steps: + - name: Checkout the latest code + uses: actions/checkout@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 # otherwise, you will fail to push refs to dest repo + - name: Automatic Rebase + uses: cirrus-actions/rebase@1.5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 174f0b5a378..a1d890429b3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ *.dll *.lib *.dylib +*.pyc mnist/ data/ caffe/ @@ -22,6 +23,8 @@ cfg/ temp/ build/darknet/* build_*/ +ninja/ +ninja.zip vcpkg_installed/ !build/darknet/YoloWrapper.cs .fuse* @@ -36,6 +39,8 @@ build/.ninja_deps build/.ninja_log build/Makefile */vcpkg-manifest-install.log +build.log +__pycache__/ # OS Generated # .DS_Store* diff --git a/.travis.yml b/.travis.yml index 447a72a179d..f208498dbcd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,32 +16,6 @@ matrix: - additional_defines=" -DENABLE_CUDA=OFF -DENABLE_CUDNN=OFF -DENABLE_OPENCV=OFF" - MATRIX_EVAL="" - - os: osx - compiler: gcc - name: macOS - gcc (llvm backend) - opencv@2 - osx_image: xcode12.3 - env: - - OpenCV_DIR="/usr/local/opt/opencv@2/" - - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv@2" - - - os: osx - compiler: gcc - name: macOS - gcc (llvm backend) - opencv@3 - osx_image: xcode12.3 - env: - - OpenCV_DIR="/usr/local/opt/opencv@3/" - - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv@3" - - - os: osx - compiler: gcc - name: macOS - gcc (llvm backend) - opencv(latest) - osx_image: xcode12.3 - env: - - additional_defines=" -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv" - - os: osx compiler: clang name: macOS - clang @@ -58,40 +32,6 @@ matrix: - additional_defines="-DBUILD_AS_CPP:BOOL=TRUE -DENABLE_CUDA=OFF -DENABLE_CUDNN=OFF -DENABLE_OPENCV=OFF" - MATRIX_EVAL="" - - os: osx - compiler: clang - name: macOS - clang - opencv@2 - osx_image: xcode12.3 - env: - - OpenCV_DIR="/usr/local/opt/opencv@2/" - - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv@2" - - - os: osx - compiler: clang - name: macOS - clang - opencv@3 - osx_image: xcode12.3 - env: - - OpenCV_DIR="/usr/local/opt/opencv@3/" - - additional_defines="-DOpenCV_DIR=${OpenCV_DIR} -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv@3" - - - os: osx - compiler: clang - name: macOS - clang - opencv(latest) - osx_image: xcode12.3 - env: - - additional_defines=" -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv" - - - os: osx - compiler: clang - name: macOS - clang - opencv(latest) - libomp - osx_image: xcode12.3 - env: - - additional_defines=" -DENABLE_CUDA=OFF" - - MATRIX_EVAL="brew install opencv libomp" - - os: linux compiler: clang dist: bionic diff --git a/CMakeLists.txt b/CMakeLists.txt index 00f446fcccf..0e1abf32d9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,8 @@ set(Darknet_PATCH_VERSION 5) set(Darknet_TWEAK_VERSION 4) set(Darknet_VERSION ${Darknet_MAJOR_VERSION}.${Darknet_MINOR_VERSION}.${Darknet_PATCH_VERSION}.${Darknet_TWEAK_VERSION}) +message("Darknet_VERSION: ${Darknet_VERSION}") + option(CMAKE_VERBOSE_MAKEFILE "Create verbose makefile" ON) option(CUDA_VERBOSE_BUILD "Create verbose CUDA build" ON) option(BUILD_SHARED_LIBS "Create dark as a shared library" ON) @@ -19,30 +21,49 @@ option(ENABLE_CUDNN "Enable CUDNN" ON) option(ENABLE_CUDNN_HALF "Enable CUDNN Half precision" ON) option(ENABLE_ZED_CAMERA "Enable ZED Camera support" ON) option(ENABLE_VCPKG_INTEGRATION "Enable VCPKG integration" ON) +option(ENABLE_CSHARP_WRAPPER "Enable building a csharp wrapper" OFF) +option(VCPKG_BUILD_OPENCV_WITH_CUDA "Build OpenCV with CUDA extension integration" ON) +option(VCPKG_USE_OPENCV2 "Use legacy OpenCV 2" OFF) +option(VCPKG_USE_OPENCV3 "Use legacy OpenCV 3" OFF) +option(VCPKG_USE_OPENCV4 "Use OpenCV 4" ON) -if(ENABLE_OPENCV_WITH_CUDA AND NOT APPLE) - list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda") +if(VCPKG_USE_OPENCV4 AND VCPKG_USE_OPENCV2) + message(STATUS "You required vcpkg feature related to OpenCV 2 but forgot to turn off those for OpenCV 4, doing that for you") + set(VCPKG_USE_OPENCV4 OFF CACHE BOOL "Use OpenCV 4" FORCE) +endif() +if(VCPKG_USE_OPENCV4 AND VCPKG_USE_OPENCV3) + message(STATUS "You required vcpkg feature related to OpenCV 3 but forgot to turn off those for OpenCV 4, doing that for you") + set(VCPKG_USE_OPENCV4 OFF CACHE BOOL "Use OpenCV 4" FORCE) +endif() +if(VCPKG_USE_OPENCV2 AND VCPKG_USE_OPENCV3) + message(STATUS "You required vcpkg features related to both OpenCV 2 and OpenCV 3. Impossible to satisfy, keeping only OpenCV 3") + set(VCPKG_USE_OPENCV2 OFF CACHE BOOL "Use legacy OpenCV 2" FORCE) endif() + if(ENABLE_CUDA AND NOT APPLE) list(APPEND VCPKG_MANIFEST_FEATURES "cuda") endif() -if(ENABLE_OPENCV) - list(APPEND VCPKG_MANIFEST_FEATURES "opencv-base") -endif() if(ENABLE_CUDNN AND ENABLE_CUDA AND NOT APPLE) list(APPEND VCPKG_MANIFEST_FEATURES "cudnn") endif() - -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE) - if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(CMAKE_COMPILER_IS_CLANG TRUE) +if(ENABLE_OPENCV) + if(VCPKG_BUILD_OPENCV_WITH_CUDA AND NOT APPLE) + if(VCPKG_USE_OPENCV4) + list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda") + elseif(VCPKG_USE_OPENCV3) + list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-cuda") + elseif(VCPKG_USE_OPENCV2) + list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-cuda") + endif() else() - set(CMAKE_COMPILER_IS_CLANG FALSE) + if(VCPKG_USE_OPENCV4) + list(APPEND VCPKG_MANIFEST_FEATURES "opencv-base") + elseif(VCPKG_USE_OPENCV3) + list(APPEND VCPKG_MANIFEST_FEATURES "opencv3-base") + elseif(VCPKG_USE_OPENCV2) + list(APPEND VCPKG_MANIFEST_FEATURES "opencv2-base") + endif() endif() -else() - set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE) - set(CMAKE_COMPILER_IS_CLANG FALSE) endif() if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32) @@ -87,6 +108,18 @@ enable_language(CXX) set(CMAKE_CXX_STANDARD 11) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH}) +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE) + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "clang") + set(CMAKE_COMPILER_IS_CLANG TRUE) + else() + set(CMAKE_COMPILER_IS_CLANG FALSE) + endif() +else() + set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE) + set(CMAKE_COMPILER_IS_CLANG FALSE) +endif() + set(default_build_type "Release") if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to '${default_build_type}' as none was specified.") @@ -201,12 +234,14 @@ endif() set(ADDITIONAL_CXX_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings") set(ADDITIONAL_C_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings") +if(UNIX AND BUILD_SHARED_LIBS AND NOT CMAKE_COMPILER_IS_CLANG) + set(SHAREDLIB_CXX_FLAGS "-Wl,-Bsymbolic") + set(SHAREDLIB_C_FLAGS "-Wl,-Bsymbolic") +endif() if(MSVC) set(ADDITIONAL_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast") set(ADDITIONAL_C_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast") - set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}") string(REGEX REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) string(REGEX REPLACE "/O2" "/Ox" CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE}) endif() @@ -218,8 +253,6 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG) set(CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}") endif() endif() - set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}") string(REGEX REPLACE "-O0" "-Og" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) string(REGEX REPLACE "-O3" "-Ofast" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) string(REGEX REPLACE "-O0" "-Og" CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) @@ -230,18 +263,23 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG) endif() endif() +set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${SHAREDLIB_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") +set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${SHAREDLIB_C_FLAGS} ${CMAKE_C_FLAGS}") + if(OpenCV_FOUND) - if(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION) - set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE) - message(STATUS " -> darknet is fine for now, but uselib_track has been disabled!") - message(STATUS " -> Please rebuild OpenCV from sources with CUDA support to enable it") - elseif(ENABLE_CUDA AND OpenCV_CUDA_VERSION) + if(ENABLE_CUDA AND OpenCV_CUDA_VERSION) if(TARGET opencv_cudaoptflow) list(APPEND OpenCV_LINKED_COMPONENTS "opencv_cudaoptflow") endif() if(TARGET opencv_cudaimgproc) list(APPEND OpenCV_LINKED_COMPONENTS "opencv_cudaimgproc") endif() + elseif(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION) + set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE) + message(STATUS " -> darknet is fine for now, but uselib_track has been disabled!") + message(STATUS " -> Please rebuild OpenCV from sources with CUDA support to enable it") + else() + set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE) endif() endif() @@ -539,3 +577,7 @@ install(FILES "${PROJECT_BINARY_DIR}/DarknetConfigVersion.cmake" DESTINATION "${INSTALL_CMAKE_DIR}" ) + +if(ENABLE_CSHARP_WRAPPER) + add_subdirectory(src/csharp) +endif() diff --git a/Makefile b/Makefile index fc851c50f4a..a0560789e2c 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed endif endif -OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o +OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o representation_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o diff --git a/README.md b/README.md index 2de8c7bf980..bb545cf097a 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,18 @@ Paper YOLO v4: https://arxiv.org/abs/2004.10934 Paper Scaled YOLO v4: https://arxiv.org/abs/2011.08036 use to reproduce results: [ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4) -More details in articles on medium: - * [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) - * [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) +More details in articles on medium: + +- [Scaled_YOLOv4](https://alexeyab84.medium.com/scaled-yolo-v4-is-the-best-neural-network-for-object-detection-on-ms-coco-dataset-39dfa22fa982?source=friends_link&sk=c8553bfed861b1a7932f739d26f487c8) +- [YOLOv4](https://medium.com/@alexeyab84/yolov4-the-most-accurate-real-time-neural-network-on-ms-coco-dataset-73adfd3602fe?source=friends_link&sk=6039748846bbcf1d960c3061542591d7) Manual: https://github.com/AlexeyAB/darknet/wiki -Discussion: - - [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/) - - [Google-groups](https://groups.google.com/forum/#!forum/darknet) - - [Discord](https://discord.gg/zSq8rtW) +Discussion: + +- [Reddit](https://www.reddit.com/r/MachineLearning/comments/gydxzd/p_yolov4_the_most_accurate_realtime_neural/) +- [Google-groups](https://groups.google.com/forum/#!forum/darknet) +- [Discord](https://discord.gg/zSq8rtW) About Darknet framework: http://pjreddie.com/darknet/ @@ -26,76 +28,77 @@ About Darknet framework: http://pjreddie.com/darknet/ [![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE) [![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965) [![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2004.10934-B31B1B.svg)](https://arxiv.org/abs/2004.10934) +[![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2011.08036-B31B1B.svg)](https://arxiv.org/abs/2011.08036) [![colab](https://user-images.githubusercontent.com/4096485/86174089-b2709f80-bb29-11ea-9faf-3d8dc668a1a5.png)](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) [![colab](https://user-images.githubusercontent.com/4096485/86174097-b56b9000-bb29-11ea-9240-c17f6bacfc34.png)](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg) - -* [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo) -* [Requirements (and how to install dependecies)](#requirements) -* [Pre-trained models](#pre-trained-models) -* [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions) -* [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations) -* [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks) -* [Datasets](#datasets) +- [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo) +- [Requirements (and how to install dependencies)](#requirements-for-windows-linux-and-macos) +- [Pre-trained models](#pre-trained-models) +- [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions) +- [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations) +- [Yolo v4 in other frameworks (TensorRT, TensorFlow, PyTorch, OpenVINO, OpenCV-dnn, TVM,...)](#yolo-v4-in-other-frameworks) +- [Datasets](#datasets) - [Yolo v4, v3 and v2 for Windows and Linux](#yolo-v4-v3-and-v2-for-windows-and-linux) - [(neural networks for object detection)](#neural-networks-for-object-detection) - - [GeForce RTX 2080 Ti:](#geforce-rtx-2080-ti) + - [GeForce RTX 2080 Ti](#geforce-rtx-2080-ti) - [Youtube video of results](#youtube-video-of-results) - [How to evaluate AP of YOLOv4 on the MS COCO evaluation server](#how-to-evaluate-ap-of-yolov4-on-the-ms-coco-evaluation-server) - [How to evaluate FPS of YOLOv4 on GPU](#how-to-evaluate-fps-of-yolov4-on-gpu) - [Pre-trained models](#pre-trained-models) - - [Requirements](#requirements) + - [Requirements for Windows, Linux and macOS](#requirements-for-windows-linux-and-macos) - [Yolo v4 in other frameworks](#yolo-v4-in-other-frameworks) - [Datasets](#datasets) - [Improvements in this repository](#improvements-in-this-repository) - [How to use on the command line](#how-to-use-on-the-command-line) - [For using network video-camera mjpeg-stream with any Android smartphone](#for-using-network-video-camera-mjpeg-stream-with-any-android-smartphone) - [How to compile on Linux/macOS (using `CMake`)](#how-to-compile-on-linuxmacos-using-cmake) - - [Using `vcpkg`](#using-vcpkg) - - [Using libraries manually provided](#using-libraries-manually-provided) + - [Using also PowerShell](#using-also-powershell) - [How to compile on Linux (using `make`)](#how-to-compile-on-linux-using-make) - [How to compile on Windows (using `CMake`)](#how-to-compile-on-windows-using-cmake) - [How to compile on Windows (using `vcpkg`)](#how-to-compile-on-windows-using-vcpkg) - [How to train with multi-GPU](#how-to-train-with-multi-gpu) - [How to train (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects) - - [How to train tiny-yolo (to detect your custom objects):](#how-to-train-tiny-yolo-to-detect-your-custom-objects) - - [When should I stop training:](#when-should-i-stop-training) - - [Custom object detection:](#custom-object-detection) - - [How to improve object detection:](#how-to-improve-object-detection) - - [How to mark bounded boxes of objects and create annotation files:](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files) + - [How to train tiny-yolo (to detect your custom objects)](#how-to-train-tiny-yolo-to-detect-your-custom-objects) + - [When should I stop training](#when-should-i-stop-training) + - [Custom object detection](#custom-object-detection) + - [How to improve object detection](#how-to-improve-object-detection) + - [How to mark bounded boxes of objects and create annotation files](#how-to-mark-bounded-boxes-of-objects-and-create-annotation-files) - [How to use Yolo as DLL and SO libraries](#how-to-use-yolo-as-dll-and-so-libraries) +- [Citation](#citation) -![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) +![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png) ![scaled_yolov4](https://user-images.githubusercontent.com/4096485/112776361-281d8380-9048-11eb-8083-8728b12dcd55.png) AP50:95 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2011.08036 ---- -![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 - +![modern_gpus](https://user-images.githubusercontent.com/4096485/82835867-f1c62380-9ecd-11ea-9134-1598ed2abc4b.png) AP50:95 / AP50 - FPS (Tesla V100) Paper: https://arxiv.org/abs/2004.10934 tkDNN-TensorRT accelerates YOLOv4 **~2x** times for batch=1 and **3x-4x** times for batch=4. -* tkDNN: https://github.com/ceccocats/tkDNN -* OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf - -#### GeForce RTX 2080 Ti: -| Network Size | Darknet, FPS (avg)| tkDNN TensorRT FP32, FPS | tkDNN TensorRT FP16, FPS | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS | OpenCV FP16 batch=4, FPS | tkDNN Speedup | -|:-----:|:--------:|--------:|--------:|--------:|--------:|--------:|------:| -|320 | 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** | -|416 | 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** | -|512 | 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** | -|608 | 53 | 62 | 103 | **115**| 150 | **150** | **2.8x** | -|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x** | -|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x** | - -* Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png) -* Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png) -* CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks -* Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg) -* Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf -* Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg -* Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg + +- tkDNN: https://github.com/ceccocats/tkDNN +- OpenCV: https://gist.github.com/YashasSamaga/48bdb167303e10f4d07b754888ddbdcf + +### GeForce RTX 2080 Ti + +| Network Size | Darknet, FPS (avg) | tkDNN TensorRT FP32, FPS | tkDNN TensorRT FP16, FPS | OpenCV FP16, FPS | tkDNN TensorRT FP16 batch=4, FPS | OpenCV FP16 batch=4, FPS | tkDNN Speedup | +|:--------------------------:|:------------------:|-------------------------:|-------------------------:|-----------------:|---------------------------------:|-------------------------:|--------------:| +|320 | 100 | 116 | **202** | 183 | 423 | **430** | **4.3x** | +|416 | 82 | 103 | **162** | 159 | 284 | **294** | **3.6x** | +|512 | 69 | 91 | 134 | **138** | 206 | **216** | **3.1x** | +|608 | 53 | 62 | 103 | **115** | 150 | **150** | **2.8x** | +|Tiny 416 | 443 | 609 | **790** | 773 | **1774** | 1353 | **3.5x** | +|Tiny 416 CPU Core i7 7700HQ | 3.4 | - | - | 42 | - | 39 | **12x** | + +- Yolo v4 Full comparison: [map_fps](https://user-images.githubusercontent.com/4096485/80283279-0e303e00-871f-11ea-814c-870967d77fd1.png) +- Yolo v4 tiny comparison: [tiny_fps](https://user-images.githubusercontent.com/4096485/85734112-6e366700-b705-11ea-95d1-fcba0de76d72.png) +- CSPNet: [paper](https://arxiv.org/abs/1911.11929) and [map_fps](https://user-images.githubusercontent.com/4096485/71702416-6645dc00-2de0-11ea-8d65-de7d4b604021.png) comparison: https://github.com/WongKinYiu/CrossStagePartialNetworks +- Yolo v3 on MS COCO: [Speed / Accuracy (mAP@0.5) chart](https://user-images.githubusercontent.com/4096485/52151356-e5d4a380-2683-11e9-9d7d-ac7bc192c477.jpg) +- Yolo v3 on MS COCO (Yolo v3 vs RetinaNet) - Figure 3: https://arxiv.org/pdf/1804.02767v1.pdf +- Yolo v2 on Pascal VOC 2007: https://hsto.org/files/a24/21e/068/a2421e0689fb43f08584de9d44c2215f.jpg +- Yolo v2 on Pascal VOC 2012 (comp4): https://hsto.org/files/3a6/fdf/b53/3a6fdfb533f34cee9b52bdd9bb0b19d9.jpg #### Youtube video of results @@ -107,7 +110,7 @@ Others: https://www.youtube.com/user/pjreddie/videos #### How to evaluate AP of YOLOv4 on the MS COCO evaluation server 1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip -2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt +2. Download list of images for Detection tasks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt 3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) 4. Content of the file `cfg/coco.data` should be @@ -132,9 +135,9 @@ eval=coco 3. Get any .avi/.mp4 video file (preferably not more than 1920x1080 to avoid bottlenecks in CPU performance) 4. Run one of two commands and look at the AVG FPS: -* include video_capturing + NMS + drawing_bboxes: +- include video_capturing + NMS + drawing_bboxes: `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -dont_show -ext_output` -* exclude video_capturing + NMS + drawing_bboxes: +- exclude video_capturing + NMS + drawing_bboxes: `./darknet detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -benchmark` #### Pre-trained models @@ -143,52 +146,58 @@ There are weights-file for different cfg-files (trained for MS COCO dataset): FPS on RTX 2070 (R) and Tesla V100 (V): -* [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **67.9% mAP@0.5 (49.4% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights) - * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166 +- [yolov4-p6.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-p6.cfg) - 1280x1280 - **72.1% mAP@0.5 (54.0% AP@0.5:0.95) - 32(V) FPS** - xxx BFlops (xxx FMA) - 487 MB: [yolov4-p6.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p6.weights) + - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p6.conv.289 + +- [yolov4-p5.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-p5.cfg) - 896x896 - **70.0% mAP@0.5 (51.6% AP@0.5:0.95) - 43(V) FPS** - xxx BFlops (xxx FMA) - 271 MB: [yolov4-p5.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p5.weights) + - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-p5.conv.232 -* [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036) +- [yolov4x-mish.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4x-mish.cfg) - 640x640 - **68.5% mAP@0.5 (50.1% AP@0.5:0.95) - 23(R) FPS / 50(V) FPS** - 221 BFlops (110 FMA) - 381 MB: [yolov4x-mish.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.weights) + - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4x-mish.conv.166 + +- [yolov4-csp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-csp.cfg) - 202 MB: [yolov4-csp.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.weights) paper [Scaled Yolo v4](https://arxiv.org/abs/2011.08036) just change `width=` and `height=` parameters in `yolov4-csp.cfg` file and use the same `yolov4-csp.weights` file for all cases: - * `width=640 height=640` in cfg: **66.2% mAP@0.5 (47.5% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops - * `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops - * pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142 - -* [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934) + - `width=640 height=640` in cfg: **67.4% mAP@0.5 (48.7% AP@0.5:0.95) - 70(V) FPS** - 120 (60 FMA) BFlops + - `width=512 height=512` in cfg: **64.8% mAP@0.5 (46.2% AP@0.5:0.95) - 93(V) FPS** - 77 (39 FMA) BFlops + - pre-trained weights for training: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-csp.conv.142 + +- [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) - 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) paper [Yolo v4](https://arxiv.org/abs/2004.10934) just change `width=` and `height=` parameters in `yolov4.cfg` file and use the same `yolov4.weights` file for all cases: - * `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops - * `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops - * `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops - * `width=320 height=320` in cfg: **60% mAP@0.5 ( 38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops + - `width=608 height=608` in cfg: **65.7% mAP@0.5 (43.5% AP@0.5:0.95) - 34(R) FPS / 62(V) FPS** - 128.5 BFlops + - `width=512 height=512` in cfg: **64.9% mAP@0.5 (43.0% AP@0.5:0.95) - 45(R) FPS / 83(V) FPS** - 91.1 BFlops + - `width=416 height=416` in cfg: **62.8% mAP@0.5 (41.2% AP@0.5:0.95) - 55(R) FPS / 96(V) FPS** - 60.1 BFlops + - `width=320 height=320` in cfg: **60% mAP@0.5 ( 38% AP@0.5:0.95) - 63(R) FPS / 123(V) FPS** - 35.5 BFlops -* [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights) +- [yolov4-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg) - **40.2% mAP@0.5 - 371(1080Ti) FPS / 330(RTX2070) FPS** - 6.9 BFlops - 23.1 MB: [yolov4-tiny.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights) -* [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view) +- [enet-coco.cfg (EfficientNetB0-Yolov3)](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/enet-coco.cfg) - **45.5% mAP@0.5 - 55(R) FPS** - 3.7 BFlops - 18.3 MB: [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view) -* [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights) +- [yolov3-openimages.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-openimages.cfg) - 247 MB - 18(R) FPS - OpenImages dataset: [yolov3-openimages.weights](https://pjreddie.com/media/files/yolov3-openimages.weights)
CLICK ME - Yolo v3 models -* [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc) +- [csresnext50-panet-spp-original-optimal.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp-original-optimal.cfg) - **65.4% mAP@0.5 (43.2% AP@0.5:0.95) - 32(R) FPS** - 100.5 BFlops - 217 MB: [csresnext50-panet-spp-original-optimal_final.weights](https://drive.google.com/open?id=1_NnfVgj0EDtb_WLNoXV8Mo7WKgwdYZCc) -* [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights) +- [yolov3-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg) - **60.6% mAP@0.5 - 38(R) FPS** - 141.5 BFlops - 240 MB: [yolov3-spp.weights](https://pjreddie.com/media/files/yolov3-spp.weights) -* [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing) +- [csresnext50-panet-spp.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/csresnext50-panet-spp.cfg) - **60.0% mAP@0.5 - 44 FPS** - 71.3 BFlops - 217 MB: [csresnext50-panet-spp_final.weights](https://drive.google.com/file/d/1aNXdM8qVy11nqTcd2oaVB3mf7ckr258-/view?usp=sharing) -* [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) +- [yolov3.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg) - **55.3% mAP@0.5 - 66(R) FPS** - 65.9 BFlops - 236 MB: [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) -* [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) +- [yolov3-tiny.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg) - **33.1% mAP@0.5 - 345(R) FPS** - 5.6 BFlops - 33.7 MB: [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) -* [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing) +- [yolov3-tiny-prn.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny-prn.cfg) - **33.1% mAP@0.5 - 370(R) FPS** - 3.5 BFlops - 18.8 MB: [yolov3-tiny-prn.weights](https://drive.google.com/file/d/18yYZWyKbo4XSDVyztmsEcF9B_6bxrhUY/view?usp=sharing)
CLICK ME - Yolo v2 models -* `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights -* `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights -* `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights -* `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights -* `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights +- `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights +- `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights +- `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights +- `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights +- `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights
@@ -196,73 +205,77 @@ Put it near compiled: darknet.exe You can get cfg-files by path: `darknet/cfg/` -### Requirements +### Requirements for Windows, Linux and macOS -* **CMake >= 3.18**: https://cmake.org/download/ -* **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell -* **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)) -* **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png)) -* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows ) -* **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported +- **CMake >= 3.18**: https://cmake.org/download/ +- **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell +- **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)) +- **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png)) +- **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows ) +- **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported ### Yolo v4 in other frameworks -* **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4 -* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite +- **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4 +- **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite Official TF models: https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/yolo For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format) -* **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150) -* **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) -* **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn -* **PyTorch > ONNX**: - * [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4) - * [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch) - * [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4) - * [YOLOv5](https://github.com/ultralytics/yolov5) -* **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/ -* **TensorRT** YOLOv4 on TensorRT+tkDNN: https://github.com/ceccocats/tkDNN - For YOLOv3 (-70% faster inference): [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) read [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) implemented yolov3-spp, yolov4, etc. -* **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo -* **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt -* **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial -* **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/) -* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about -* **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite -* **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron +- **OpenCV** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150) +- **Intel OpenVINO 2021.2:** supports YOLOv4 (NPU Myriad X / USB Neural Compute Stick / Arria FPGA): https://devmesh.intel.com/projects/openvino-yolov4-49c756 read this [manual](https://github.com/TNTWEN/OpenVINO-YOLOV4) (old [manual](https://software.intel.com/en-us/articles/OpenVINO-Using-TensorFlow#converting-a-darknet-yolo-model) ) (for [Scaled-YOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-large) models use https://github.com/Chen-MingChang/pytorch_YOLO_OpenVINO_demo ) +- **PyTorch > ONNX**: + - [WongKinYiu/PyTorch_YOLOv4](https://github.com/WongKinYiu/PyTorch_YOLOv4) + - [maudzung/3D-YOLOv4](https://github.com/maudzung/Complex-YOLOv4-Pytorch) + - [Tianxiaomo/pytorch-YOLOv4](https://github.com/Tianxiaomo/pytorch-YOLOv4) + - [YOLOv5](https://github.com/ultralytics/yolov5) +- **ONNX** on Jetson for YOLOv4: https://developer.nvidia.com/blog/announcing-onnx-runtime-for-jetson/ and https://github.com/ttanzhiqiang/onnx_tensorrt_project +- **nVidia Transfer Learning Toolkit (TLT>=3.0)** Training and Detection https://docs.nvidia.com/metropolis/TLT/tlt-user-guide/text/object_detection/yolo_v4.html +- **TensorRT+tkDNN**: https://github.com/ceccocats/tkDNN#fps-results +- **Deepstream 5.0 / TensorRT for YOLOv4** https://github.com/NVIDIA-AI-IOT/yolov4_deepstream or https://github.com/marcoslucianops/DeepStream-Yolo read [Yolo is natively supported in DeepStream 4.0](https://news.developer.nvidia.com/deepstream-sdk-4-now-available/) and [PDF](https://docs.nvidia.com/metropolis/deepstream/Custom_YOLO_Model_in_the_DeepStream_YOLO_App.pdf). Additionally [jkjung-avt/tensorrt_demos](https://github.com/jkjung-avt/tensorrt_demos) or [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx) +- **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt +- **DirectML** https://github.com/microsoft/DirectML/tree/master/Samples/yolov4 +- **OpenCL** (Intel, AMD, Mali GPUs for macOS & GNU/Linux) https://github.com/sowson/darknet +- **HIP** for Training and Detection on AMD GPU https://github.com/os-hackathon/darknet +- **ROS** (Robot Operating System) https://github.com/engcang/ros-yolo-sort +- **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial +- **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/) +- **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about +- **Tencent/ncnn:** the fastest inference of YOLOv4 on mobile phone CPU: https://github.com/Tencent/ncnn +- **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite +- **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron #### Datasets -* MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset -* OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset -* Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets -* ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set) -* German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task -* List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets +- MS COCO: use `./scripts/get_coco_dataset.sh` to get labeled MS COCO detection dataset +- OpenImages: use `python ./scripts/get_openimages_dataset.py` for labeling train detection dataset +- Pascal VOC: use `python ./scripts/voc_label.py` for labeling Train/Test/Val detection datasets +- ILSVRC2012 (ImageNet classification): use `./scripts/get_imagenet_train.sh` (also `imagenet_label.sh` for labeling valid set) +- German/Belgium/Russian/LISA/MASTIF Traffic Sign Datasets for Detection - use this parsers: https://github.com/angeligareta/Datasets2Darknet#detection-task +- List of other datasets: https://github.com/AlexeyAB/darknet/tree/master/scripts#datasets ### Improvements in this repository -* developed State-of-the-Art object detector YOLOv4 -* added State-of-Art models: CSP, PRN, EfficientNet -* added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm] -* added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video -* added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX -* added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync) -* improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg -* improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm -* improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln` -* improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... -* improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta -* improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**) -* optimized memory allocation during network resizing when `random=1` -* optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1 -* added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`... -* added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training -* run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser -* added calculation of anchors for training -* added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp -* run-time tips and warnings if you use incorrect cfg-file or dataset -* added support for Windows -* many other fixes of code... +- developed State-of-the-Art object detector YOLOv4 +- added State-of-Art models: CSP, PRN, EfficientNet +- added layers: [conv_lstm], [scale_channels] SE/ASFF/BiFPN, [local_avgpool], [sam], [Gaussian_yolo], [reorg3d] (fixed [reorg]), fixed [batchnorm] +- added the ability for training recurrent models (with layers conv-lstm`[conv_lstm]`/conv-rnn`[crnn]`) for accurate detection on video +- added data augmentation: `[net] mixup=1 cutmix=1 mosaic=1 blur=1`. Added activations: SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX +- added the ability for training with GPU-processing using CPU-RAM to increase the mini_batch_size and increase accuracy (instead of batch-norm sync) +- improved binary neural network performance **2x-4x times** for Detection on CPU and GPU if you trained your own weights by using this XNOR-net model (bit-1 inference) : https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov3-tiny_xnor.cfg +- improved neural network performance **~7%** by fusing 2 layers into 1: Convolutional + Batch-norm +- improved performance: Detection **2x times**, on GPU Volta/Turing (Tesla V100, GeForce RTX, ...) using Tensor Cores if `CUDNN_HALF` defined in the `Makefile` or `darknet.sln` +- improved performance **~1.2x** times on FullHD, **~2x** times on 4K, for detection on the video (file/stream) using `darknet detector demo`... +- improved performance **3.5 X times** of data augmentation for training (using OpenCV SSE/AVX functions instead of hand-written functions) - removes bottleneck for training on multi-GPU or GPU Volta +- improved performance of detection and training on Intel CPU with AVX (Yolo v3 **~85%**) +- optimized memory allocation during network resizing when `random=1` +- optimized GPU initialization for detection - we use batch=1 initially instead of re-init with batch=1 +- added correct calculation of **mAP, F1, IoU, Precision-Recall** using command `darknet detector map`... +- added drawing of chart of average-Loss and accuracy-mAP (`-map` flag) during training +- run `./darknet detector demo ... -json_port 8070 -mjpeg_port 8090` as JSON and MJPEG server to get results online over the network by using your soft or Web-browser +- added calculation of anchors for training +- added example of Detection and Tracking objects: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp +- run-time tips and warnings if you use incorrect cfg-file or dataset +- added support for Windows +- many other fixes of code... And added manual - [How to train Yolo v4-v2 (to detect your custom objects)](#how-to-train-to-detect-your-custom-objects) @@ -272,82 +285,78 @@ Also, you might be interested in using a simplified repository where is implemen On Linux use `./darknet` instead of `darknet.exe`, like this:`./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights` -On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` - -* Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25` -* **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg` -* Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4` -* Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0` -* Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg` -* Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi` -* Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4` -* **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output` -* Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4` -* Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25` -* Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): +On Linux find executable file `./darknet` in the root directory, while on Windows find it in the directory `\build\darknet\x64` + +- Yolo v4 COCO - **image**: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25` +- **Output coordinates** of objects: `darknet.exe detector test cfg/coco.data yolov4.cfg yolov4.weights -ext_output dog.jpg` +- Yolo v4 COCO - **video**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output test.mp4` +- Yolo v4 COCO - **WebCam 0**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights -c 0` +- Yolo v4 COCO for **net-videocam** - Smart WebCam: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg` +- Yolo v4 - **save result videofile res.avi**: `darknet.exe detector demo cfg/coco.data cfg/yolov4.cfg yolov4.weights test.mp4 -out_filename res.avi` +- Yolo v3 **Tiny** COCO - video: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights test.mp4` +- **JSON and MJPEG server** that allows multiple connections from your soft or Web-browser `ip-address:8070` and 8090: `./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -json_port 8070 -mjpeg_port 8090 -ext_output` +- Yolo v3 Tiny **on GPU #1**: `darknet.exe detector demo cfg/coco.data cfg/yolov3-tiny.cfg yolov3-tiny.weights -i 1 test.mp4` +- Alternative method Yolo v3 COCO - image: `darknet.exe detect cfg/yolov4.cfg yolov4.weights -i 0 -thresh 0.25` +- Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` -* 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights` -* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app -* To process a list of images `data/train.txt` and save results of detection to `result.json` file use: +- 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights` +- Remember to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app +- To process a list of images `data/train.txt` and save results of detection to `result.json` file use: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt` -* To process a list of images `data/train.txt` and save results of detection to `result.txt` use: +- To process a list of images `data/train.txt` and save results of detection to `result.txt` use: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt` -* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `.txt` (in this way you can increase the amount of training data) use: +- Pseudo-labelling - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `.txt` (in this way you can increase the amount of training data) use: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt` -* To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` -* To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` -* To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75` +- To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` +- To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` +- To check accuracy mAP@IoU=75: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights -iou_thresh 0.75` ##### For using network video-camera mjpeg-stream with any Android smartphone 1. Download for Android phone mjpeg-stream soft: IP Webcam / Smart WebCam - * Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2 - * IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam + - Smart WebCam - preferably: https://play.google.com/store/apps/details?id=com.acontech.android.SmartWebCam2 + - IP Webcam: https://play.google.com/store/apps/details?id=com.pas.webcam 2. Connect your Android phone to computer by WiFi (through a WiFi-router) or USB 3. Start Smart WebCam on your phone 4. Replace the address below, on shown in the phone application (Smart WebCam) and launch: -* Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0` +- Yolo v4 COCO-model: `darknet.exe detector demo data/coco.data yolov4.cfg yolov4.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0` ### How to compile on Linux/macOS (using `CMake`) The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development. -Install powershell if you do not already have it ([guide here](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell)). - -To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware.com/ +To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware.com/ or https://cmake.org/download/ -### Using `vcpkg` - -Open a shell and type these commands - -```PowerShell -PS Code/> git clone https://github.com/microsoft/vcpkg -PS Code/> cd vcpkg -PS Code/vcpkg> ./bootstrap-vcpkg.sh -PS Code/vcpkg> $env:VCPKG_ROOT=$PWD -PS Code/vcpkg> cd .. -PS Code/> git clone https://github.com/AlexeyAB/darknet -PS Code/> cd darknet -PS Code/darknet> ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN +```bash +git clone https://github.com/AlexeyAB/darknet +cd darknet +mkdir build_release +cd build_release +cmake .. +cmake --build . --target install --parallel 8 ``` -(add option `-EnableOPENCV_CUDA` if you want to build OpenCV with CUDA support - very slow to build!) -If you open the `build.ps1` script at the beginning you will find all available switches. +### Using also PowerShell + +Install: `Cmake`, `CUDA`, `cuDNN` [How to install dependencies](#requirements) -### Using libraries manually provided +Install powershell for your OS (Linux or MacOS) ([guide here](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell)). -Open a shell and type these commands +Open PowerShell type these commands ```PowerShell -PS Code/> git clone https://github.com/AlexeyAB/darknet -PS Code/> cd darknet -PS Code/darknet> ./build.ps1 -EnableOPENCV -EnableCUDA -EnableCUDNN +git clone https://github.com/AlexeyAB/darknet +cd darknet +./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN ``` -(remove options like `-EnableCUDA` or `-EnableCUDNN` if you are not interested into). +- remove options like `-EnableCUDA` or `-EnableCUDNN` if you are not interested into +- remove option `-UseVCPKG` if you plan to manually provide OpenCV library to darknet or if you do not want to enable OpenCV integration +- add option `-EnableOPENCV_CUDA` if you want to build OpenCV with CUDA support - very slow to build! (requires `-UseVCPKG`) + If you open the `build.ps1` script at the beginning you will find all available switches. ### How to compile on Linux (using `make`) @@ -355,59 +364,55 @@ If you open the `build.ps1` script at the beginning you will find all available Just do `make` in the darknet directory. (You can try to compile and run it on Google Colab in cloud [link](https://colab.research.google.com/drive/12QusaaRj_lUwCGDvQNfICpa7kA7_a2dE) (press «Open in Playground» button at the top-left corner) and watch the video [link](https://www.youtube.com/watch?v=mKAEGSxwOAY) ) Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1) -* `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`) -* `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`) -* `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x -* `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams -* `DEBUG=1` to bould debug version of Yolo -* `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU -* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp +- `GPU=1` to build with CUDA to accelerate by using GPU (CUDA should be in `/usr/local/cuda`) +- `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`) +- `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x +- `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams +- `DEBUG=1` to build debug version of Yolo +- `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU +- `LIBSO=1` to build a library `darknet.so` and binary runnable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4` -* `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run +- `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera` -* You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card. +- You also need to specify for which graphics card the code is generated. This is done by setting `ARCH=`. If you use a never version than CUDA 11 you further need to edit line 20 from Makefile and remove `-gencode arch=compute_30,code=sm_30 \` as Kepler GPU support was dropped in CUDA 11. You can also drop the general `ARCH=` and just uncomment `ARCH=` for your graphics card. To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov4.cfg ./yolov4.weights` ### How to compile on Windows (using `CMake`) -Requires: -* MSVS: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community -* CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/ -* Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip) +Requires: -In Windows: +- MSVC: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community +- CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/ +- Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip) -* Start (button) -> All programms -> CMake -> CMake (gui) -> +In Windows: -* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64` -> Finish -> Generate -> Open Project -> +- Start (button) -> All programs -> CMake -> CMake (gui) -> -* in MS Visual Studio: Select: x64 and Release -> Build -> Build solution +- [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64` -> Finish -> Generate -> Open Project -> -* find the executable file `darknet.exe` in the output path to the binaries you specified +- in MS Visual Studio: Select: x64 and Release -> Build -> Build solution -![x64 and Release](https://habrastorage.org/webt/ay/ty/f-/aytyf-8bufe7q-16yoecommlwys.jpeg) +- find the executable file `darknet.exe` in the output path to the binaries you specified +![x64 and Release](https://habrastorage.org/webt/ay/ty/f-/aytyf-8bufe7q-16yoecommlwys.jpeg) ### How to compile on Windows (using `vcpkg`) This is the recommended approach to build Darknet on Windows. -1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com) +1. Install Visual Studio 2017 or 2019. In case you need to download it, please go here: [Visual Studio Community](http://visualstudio.com). Remember to install English language pack, this is mandatory for vcpkg! -2. Install CUDA (at least v10.0) enabling VS Integration during installation. +2. Install CUDA enabling VS Integration during installation. 3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands: ```PowerShell -PS Code/> git clone https://github.com/microsoft/vcpkg -PS Code/> cd vcpkg -PS Code/vcpkg> .\bootstrap-vcpkg.bat -PS Code/vcpkg> $env:VCPKG_ROOT=$PWD -PS Code/vcpkg> cd .. -PS Code/> git clone https://github.com/AlexeyAB/darknet -PS Code/> cd darknet -PS Code/darknet> .\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN +Set-ExecutionPolicy unrestricted -Scope CurrentUser -Force +git clone https://github.com/AlexeyAB/darknet +cd darknet +.\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN ``` (add option `-EnableOPENCV_CUDA` if you want to build OpenCV with CUDA support - very slow to build! - or remove options like `-EnableCUDA` or `-EnableCUDNN` if you are not interested in them). If you open the `build.ps1` script at the beginning you will find all available switches. @@ -429,31 +434,29 @@ https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ Training Yolo v4 (and v3): 0. For training `cfg/yolov4-custom.cfg` download the pre-trained weights-file (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) - 1. Create file `yolo-obj.cfg` with the same content as in `yolov4-custom.cfg` (or copy `yolov4-custom.cfg` to `yolo-obj.cfg)` and: -* change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3) -* change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) -* change line max_batches to (`classes*2000`, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes -* change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22) -* set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9 -* change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers: - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783 -* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers. - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689 - * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776 -* when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608) layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer - * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604 - * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696 - * https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789 +- change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3) +- change line subdivisions to [`subdivisions=16`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) +- change line max_batches to (`classes*2000`, but not less than number of training images and not less than `6000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes +- change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22) +- set network size `width=416 height=416` or any value multiple of 32: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9 +- change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers: + - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610 + - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696 + - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783 +- change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers. + - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603 + - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689 + - https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776 +- when using [`[Gaussian_yolo]`](https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L608) layers, change [`filters=57`] filters=(classes + 9)x3 in the 3 `[convolutional]` before each `[Gaussian_yolo]` layer + - https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L604 + - https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L696 + - https://github.com/AlexeyAB/darknet/blob/6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc/cfg/Gaussian_yolov3_BDD.cfg#L789 So if `classes=1` then should be `filters=18`. If `classes=2` then write `filters=21`. - **(Do not write in the cfg-file: filters=(classes + 5)x3)** - + (Generally `filters` depends on the `classes`, `coords` and number of `mask`s, i.e. filters=`(classes + coords + 1)*`, where `mask` is indices of anchors. If `mask` is absence, then filters=`(classes + coords + 1)*num`) So for example, for 2 objects, your file `yolo-obj.cfg` should differ from `yolov4-custom.cfg` in such lines in each of **3** [yolo]-layers: @@ -467,7 +470,6 @@ classes=2 ``` 2. Create file `obj.names` in the directory `build\darknet\x64\data\`, with objects names - each in new line - 3. Create file `obj.data` in the directory `build\darknet\x64\data\`, containing (where **classes = number of objects**): ```ini @@ -479,22 +481,22 @@ classes=2 ``` 4. Put image-files (.jpg) of your objects in the directory `build\darknet\x64\data\obj\` - 5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark -It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: +It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: ` ` - Where: - * `` - integer object number from `0` to `(classes-1)` - * ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` - * for example: ` = / ` or ` = / ` - * atention: ` ` - are center of rectangle (are not top-left corner) + Where: + +- `` - integer object number from `0` to `(classes-1)` +- ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` +- for example: ` = / ` or ` = / ` +- attention: ` ` - are center of rectangle (are not top-left corner) For example for `img1.jpg` you will be created `img1.txt` containing: - ``` + ```csv 1 0.716797 0.395833 0.216406 0.147222 0 0.687109 0.379167 0.255469 0.158333 1 0.420312 0.395833 0.140625 0.166667 @@ -502,60 +504,60 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an 6. Create file `train.txt` in directory `build\darknet\x64\data\`, with filenames of your images, each filename in new line, with path relative to `darknet.exe`, for example containing: - ``` + ```csv data/obj/img1.jpg data/obj/img2.jpg data/obj/img3.jpg ``` 7. Download pre-trained weights for the convolutional layers and put to the directory `build\darknet\x64` - * for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) - * for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29) - * for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing) - * for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74) - * for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing) - * for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing) - + - for `yolov4.cfg`, `yolov4-custom.cfg` (162 MB): [yolov4.conv.137](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137) (Google drive mirror [yolov4.conv.137](https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp) ) + - for `yolov4-tiny.cfg`, `yolov4-tiny-3l.cfg`, `yolov4-tiny-custom.cfg` (19 MB): [yolov4-tiny.conv.29](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29) + - for `csresnext50-panet-spp.cfg` (133 MB): [csresnext50-panet-spp.conv.112](https://drive.google.com/file/d/16yMYCLQTY_oDlCIZPfn_sab6KD3zgzGq/view?usp=sharing) + - for `yolov3.cfg, yolov3-spp.cfg` (154 MB): [darknet53.conv.74](https://pjreddie.com/media/files/darknet53.conv.74) + - for `yolov3-tiny-prn.cfg , yolov3-tiny.cfg` (6 MB): [yolov3-tiny.conv.11](https://drive.google.com/file/d/18v36esoXCh-PsOKwyP2GWrpYDptDY8Zf/view?usp=sharing) + - for `enet-coco.cfg (EfficientNetB0-Yolov3)` (14 MB): [enetb0-coco.conv.132](https://drive.google.com/file/d/1uhh3D6RSn0ekgmsaTcl-ZW53WBaUDo6j/view?usp=sharing) 8. Start training by using the command line: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137` - + To train on Linux use command: `./darknet detector train data/obj.data yolo-obj.cfg yolov4.conv.137` (just use `./darknet` instead of `darknet.exe`) - - * (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations) - * (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations) - * (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2) - * (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser) + + - (file `yolo-obj_last.weights` will be saved to the `build\darknet\x64\backup\` for each 100 iterations) + - (file `yolo-obj_xxxx.weights` will be saved to the `build\darknet\x64\backup\` for each 1000 iterations) + - (to disable Loss-Window use `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show`, if you train on computer without monitor like a cloud Amazon EC2) + - (to see the mAP & Loss-chart during training on remote server without GUI, use command `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` then open URL `http://ip-address:8090` in Chrome/Firefox browser) 8.1. For training with mAP (mean average precisions) calculation for each 4 Epochs (set `valid=valid.txt` or `train.txt` in `obj.data` file) and run: `darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` 9. After training is complete - get result `yolo-obj_final.weights` from path `build\darknet\x64\backup\` - * After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights` + - After each 100 iterations you can stop and later start training from this point. For example, after 2000 iterations you can stop training, and later just start training using: `darknet.exe detector train data/obj.data yolo-obj.cfg backup\yolo-obj_2000.weights` (in the original repository https://github.com/pjreddie/darknet the weights-file is saved only once every 10 000 iterations `if(iterations > 1000)`) - * Also you can get result earlier than all 45000 iterations. - + - Also you can get result earlier than all 45000 iterations. + **Note:** If during training you see `nan` values for `avg` (loss) field - then training goes wrong, but if `nan` is in some other lines - then training goes well. - + **Note:** If you changed width= or height= in your cfg-file, then new width and height must be divisible by 32. - + **Note:** After training use such command for detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` - + **Note:** if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) - -### How to train tiny-yolo (to detect your custom objects): + +### How to train tiny-yolo (to detect your custom objects) Do all the same steps as for the full yolo model as described above. With the exception of: -* Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29 + +- Download file with the first 29-convolutional layers of yolov4-tiny: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29 (Or get this file from yolov4-tiny.weights file by using command: `darknet.exe partial cfg/yolov4-tiny-custom.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29` -* Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg` -* Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29` +- Make your custom model `yolov4-tiny-obj.cfg` based on `cfg/yolov4-tiny-custom.cfg` instead of `yolov4.cfg` +- Start training: `darknet.exe detector train data/obj.data yolov4-tiny-obj.cfg yolov4-tiny.conv.29` For training Yolo based on other models ([DenseNet201-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/densenet201_yolo.cfg) or [ResNet50-Yolo](https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/resnet50_yolo.cfg)), you can download and get pre-trained weights as showed in this file: https://github.com/AlexeyAB/darknet/blob/master/build/darknet/x64/partial.cmd If you made you custom model that isn't based on other models, then you can train it without pre-trained weights, then will be used random initial weights. - -## When should I stop training: + +## When should I stop training Usually sufficient 2000 iterations for each class(object), but not less than number of training images and not less than 6000 iterations in total. But for a more precise definition when you should stop training, use the following manual: @@ -567,18 +569,18 @@ Usually sufficient 2000 iterations for each class(object), but not less than num > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images > Loaded: 0.000000 seconds - * **9002** - iteration number (number of batch) - * **0.60730 avg** - average loss (error) - **the lower, the better** +- **9002** - iteration number (number of batch) +- **0.60730 avg** - average loss (error) - **the lower, the better** - When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). + When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final average loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). - Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. + Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them: -For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**: +For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to over-fitting. **Over-fitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**: -![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) +![Over-fitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) To get weights from Early Stopping Point: @@ -588,19 +590,19 @@ To get weights from Early Stopping Point: (If you use another GitHub repository, then use `darknet.exe detector recall`... instead of `darknet.exe detector map`...) -* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` -* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights` -* `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights` +- `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` +- `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights` +- `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights` -And comapre last output lines for each weights (7000, 8000, 9000): +And compare last output lines for each weights (7000, 8000, 9000): Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union) For example, **bigger mAP** gives weights `yolo-obj_8000.weights` - then **use this weights for detection**. -Or just train with `-map` flag: +Or just train with `-map` flag: -`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` +`darknet.exe detector train data/obj.data yolo-obj.cfg yolov4.conv.137 -map` So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calculated for each 4 Epochs using `valid=valid.txt` file that is specified in `obj.data` file (`1 Epoch = images_in_train_txt / batch` iterations) @@ -610,83 +612,82 @@ So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calcu Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` -* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24 +- **IoU** (intersect over union) - average intersect over union of objects and detections for a certain threshold = 0.24 -* **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf +- **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf **mAP** is default metric of precision in the PascalVOC competition, **this is the same as AP50** metric in the MS COCO competition. In terms of Wiki, indicators Precision and Recall have a slightly different meaning than in the PascalVOC competition, but **IoU always has the same meaning**. ![precision_recall_iou](https://hsto.org/files/ca8/866/d76/ca8866d76fb840228940dbf442a7f06a.jpg) - -### Custom object detection: +### Custom object detection Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` | ![Yolo_v2_training](https://hsto.org/files/d12/1e7/515/d121e7515f6a4eb694913f10de5f2b61.jpg) | ![Yolo_v2_training](https://hsto.org/files/727/c7e/5e9/727c7e5e99bf4d4aa34027bb6a5e4bab.jpg) | |---|---| -## How to improve object detection: +## How to improve object detection 1. Before training: -* set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788) +- set flag `random=1` in your `.cfg`-file - it will increase precision by training Yolo for different resolutions: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L788) -* increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision +- increase network resolution in your `.cfg`-file (`height=608`, `width=608` or any value multiple of 32) - it will increase precision -* check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark +- check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark -* my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong. +- my Loss is very high and mAP is very low, is training wrong? Run training with `-show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong. -* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more +- for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at different: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more -* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects +- desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects -* What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected. +- What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected. -* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) +- for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) -* for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895 - * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892 - * set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989 +- for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = 23` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L895 + - set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L892 + - set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6f718c257815a984253346bba8fb7aa756c55090/cfg/yolov4.cfg#L989 -* for training for both small and large objects use modified models: - * Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg - * Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg - * YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg +- for training for both small and large objects use modified models: + - Full-model: 5 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3_5l.cfg + - Tiny-model: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny_3l.cfg + - YOLOv4: 3 yolo layers: https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-custom.cfg -* If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17 +- If you train the model to distinguish Left and Right objects as separate classes (left/right hand, left/right-turn on road signs, ...) then for disabling flip data augmentation - add `flip=0` here: https://github.com/AlexeyAB/darknet/blob/3d2d0a7c98dbc8923d9ff705b81ff4f7940ea6ff/cfg/yolov3.cfg#L17 -* General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: - * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width` - * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height` +- General rule - your training dataset should include such a set of relative sizes of objects that you want to detect: + - `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width` + - `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height` I.e. for each object from Test dataset there must be at least 1 object in the Training dataset with the same class_id and about the same relative size: - `object width in percent from Training dataset` ~= `object width in percent from Test dataset` + `object width in percent from Training dataset` ~= `object width in percent from Test dataset` That is, if only objects that occupied 80-90% of the image were present in the training set, then the trained network will not be able to detect objects that occupy 1-10% of the image. -* to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file +- to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file -* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. +- each: `model of object, side, illumination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. -* to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5. +- to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5. -* Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file: +- Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so for YOLOv4 the 1st-[yolo]-layer has anchors smaller than 30x30, 2nd smaller than 60x60, 3rd remaining, and vice versa for YOLOv3. Also you should change the `filters=(classes + 5)*` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors. 2. After training - for detection: -* Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9) +- Increase network-resolution by set in your `.cfg`-file (`height=608` and `width=608`) or (`height=832` and `width=832`) or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L8-L9) -* it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution +- it is not necessary to train the network again, just use `.weights`-file already trained for 416x416 resolution -* to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) +- to get even greater accuracy you should train with higher resolution 608x608 or 832x832, note: if error `Out of memory` occurs then in `.cfg`-file you should increase `subdivisions=16`, 32 or 64: [link](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) -## How to mark bounded boxes of objects and create annotation files: +## How to mark bounded boxes of objects and create annotation files Here you can find repository with GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 - v4: https://github.com/AlexeyAB/Yolo_mark @@ -706,40 +707,40 @@ Different tools for marking objects in images: ## How to use Yolo as DLL and SO libraries -* on Linux - * using `build.sh` or - * build `darknet` using `cmake` or - * set `LIBSO=1` in the `Makefile` and do `make` -* on Windows - * using `build.ps1` or - * build `darknet` using `cmake` or - * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution +- on Linux + - using `build.sh` or + - build `darknet` using `cmake` or + - set `LIBSO=1` in the `Makefile` and do `make` +- on Windows + - using `build.ps1` or + - build `darknet` using `cmake` or + - compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution There are 2 APIs: -* C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h - * Python examples using the C API: - * https://github.com/AlexeyAB/darknet/blob/master/darknet.py - * https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py +- C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h + - Python examples using the C API: + - https://github.com/AlexeyAB/darknet/blob/master/darknet.py + - https://github.com/AlexeyAB/darknet/blob/master/darknet_video.py -* C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp - * C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp +- C++ API: https://github.com/AlexeyAB/darknet/blob/master/include/yolo_v2_class.hpp + - C++ example that uses C++ API: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp ---- 1. To compile Yolo as C++ DLL-file `yolo_cpp_dll.dll` - open the solution `build\darknet\yolo_cpp_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_cpp_dll - * You should have installed **CUDA 10.0** - * To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;` + - You should have installed **CUDA 10.2** + - To use cuDNN do: (right click on project) -> properties -> C/C++ -> Preprocessor -> Preprocessor Definitions, and add at the beginning of line: `CUDNN;` 2. To use Yolo as DLL-file in your C++ console application - open the solution `build\darknet\yolo_console_dll.sln`, set **x64** and **Release**, and do the: Build -> Build yolo_console_dll - * you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe` + - you can run your console application from Windows Explorer `build\darknet\x64\yolo_console_dll.exe` **use this command**: `yolo_console_dll.exe data/coco.names yolov4.cfg yolov4.weights test.mp4` - * after launching your console application and entering the image file name - you will see info for each object: + - after launching your console application and entering the image file name - you will see info for each object: ` ` - * to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5) - * you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75) + - to use simple OpenCV-GUI you should uncomment line `//#define OPENCV` in `yolo_console_dll.cpp`-file: [link](https://github.com/AlexeyAB/darknet/blob/a6cbaeecde40f91ddc3ea09aa26a03ab5bbf8ba8/src/yolo_console_dll.cpp#L5) + - you can see source code of simple example for detection on the video file: [link](https://github.com/AlexeyAB/darknet/blob/ab1c5f9e57b4175f29a6ef39e7e68987d3e98704/src/yolo_console_dll.cpp#L75) `yolo_cpp_dll.dll`-API: [link](https://github.com/AlexeyAB/darknet/blob/master/src/yolo_v2_class.hpp#L42) @@ -768,3 +769,27 @@ public: #endif }; ``` + +## Citation + +``` +@misc{bochkovskiy2020yolov4, + title={YOLOv4: Optimal Speed and Accuracy of Object Detection}, + author={Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, + year={2020}, + eprint={2004.10934}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +``` +@InProceedings{Wang_2021_CVPR, + author = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark}, + title = {{Scaled-YOLOv4}: Scaling Cross Stage Partial Network}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2021}, + pages = {13029-13038} +} +``` diff --git a/build.ps1 b/build.ps1 index 7a38d0d250d..92342afd8b5 100755 --- a/build.ps1 +++ b/build.ps1 @@ -1,42 +1,207 @@ #!/usr/bin/env pwsh + param ( + [switch]$DisableInteractive = $false, [switch]$EnableCUDA = $false, [switch]$EnableCUDNN = $false, [switch]$EnableOPENCV = $false, [switch]$EnableOPENCV_CUDA = $false, [switch]$UseVCPKG = $false, + [switch]$InstallDARKNETthroughVCPKG = $false, + [switch]$InstallDARKNETdependenciesThroughVCPKGManifest = $false, + [switch]$ForceVCPKGDarknetHEAD = $false, + [switch]$DoNotUpdateVCPKG = $false, + [switch]$DoNotUpdateDARKNET = $false, + [switch]$DoNotDeleteBuildFolder = $false, [switch]$DoNotSetupVS = $false, [switch]$DoNotUseNinja = $false, [switch]$ForceCPP = $false, [switch]$ForceStaticLib = $false, - [switch]$ForceGCC8 = $false + [switch]$ForceVCPKGCacheRemoval = $false, + [switch]$ForceSetupVS = $false, + [switch]$EnableCSharpWrapper = $false, + [switch]$DownloadWeights = $false, + [Int32]$ForceGCCVersion = 0, + [Int32]$ForceOpenCVVersion = 0, + [Int32]$NumberOfBuildWorkers = 8, + [string]$AdditionalBuildSetup = "" # "-DCMAKE_CUDA_ARCHITECTURES=30" ) -$number_of_build_workers = 8 -#$additional_build_setup = " -DCMAKE_CUDA_ARCHITECTURES=30" +$build_ps1_version = "0.9.6" + +$ErrorActionPreference = "SilentlyContinue" +Stop-Transcript | out-null +$ErrorActionPreference = "Continue" +Start-Transcript -Path $PSScriptRoot/build.log + +Function MyThrow ($Message) { + if ($DisableInteractive) { + Write-Host $Message -ForegroundColor Red + throw + } + else { + # Check if running in PowerShell ISE + if ($psISE) { + # "ReadKey" not supported in PowerShell ISE. + # Show MessageBox UI + $Shell = New-Object -ComObject "WScript.Shell" + $Shell.Popup($Message, 0, "OK", 0) + throw + } + + $Ignore = + 16, # Shift (left or right) + 17, # Ctrl (left or right) + 18, # Alt (left or right) + 20, # Caps lock + 91, # Windows key (left) + 92, # Windows key (right) + 93, # Menu key + 144, # Num lock + 145, # Scroll lock + 166, # Back + 167, # Forward + 168, # Refresh + 169, # Stop + 170, # Search + 171, # Favorites + 172, # Start/Home + 173, # Mute + 174, # Volume Down + 175, # Volume Up + 176, # Next Track + 177, # Previous Track + 178, # Stop Media + 179, # Play + 180, # Mail + 181, # Select Media + 182, # Application 1 + 183 # Application 2 + + Write-Host $Message -ForegroundColor Red + Write-Host -NoNewline "Press any key to continue..." + while (($null -eq $KeyInfo.VirtualKeyCode) -or ($Ignore -contains $KeyInfo.VirtualKeyCode)) { + $KeyInfo = $Host.UI.RawUI.ReadKey("NoEcho, IncludeKeyDown") + } + Write-Host "" + throw + } +} + +Function DownloadNinja() { + Write-Host "Unable to find Ninja, downloading a portable version on-the-fly" -ForegroundColor Yellow + Remove-Item -Force -Recurse -ErrorAction SilentlyContinue ninja + Remove-Item -Force -ErrorAction SilentlyContinue ninja.zip + if ($IsWindows -or $IsWindowsPowerShell) { + $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip" + } + elseif ($IsLinux) { + $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip" + } + elseif ($IsMacOS) { + $url = "https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-mac.zip" + } + else { + MyThrow("Unknown OS, unsupported") + } + Invoke-RestMethod -Uri $url -Method Get -ContentType application/zip -OutFile "ninja.zip" + Expand-Archive -Path ninja.zip + Remove-Item -Force -ErrorAction SilentlyContinue ninja.zip +} + + +Write-Host "Darknet build script version ${build_ps1_version}" + +if ((-Not $DisableInteractive) -and (-Not $UseVCPKG)) { + $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)" + if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) { + $UseVCPKG = $true + } +} + +if ((-Not $DisableInteractive) -and (-Not $EnableCUDA) -and (-Not $IsMacOS)) { + $Result = Read-Host "Enable CUDA integration (yes/no)" + if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) { + $EnableCUDA = $true + } +} + +if ($EnableCUDA -and (-Not $DisableInteractive) -and (-Not $EnableCUDNN)) { + $Result = Read-Host "Enable CUDNN optional dependency (yes/no)" + if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) { + $EnableCUDNN = $true + } +} + +if ((-Not $DisableInteractive) -and (-Not $EnableOPENCV)) { + $Result = Read-Host "Enable OpenCV optional dependency (yes/no)" + if (($Result -eq 'Yes') -or ($Result -eq 'Y') -or ($Result -eq 'yes') -or ($Result -eq 'y')) { + $EnableOPENCV = $true + } +} + +Write-Host -NoNewLine "PowerShell version:" +$PSVersionTable.PSVersion -if (-Not $IsWindows) { +if ($PSVersionTable.PSVersion.Major -eq 5) { + $IsWindowsPowerShell = $true +} + +if ($PSVersionTable.PSVersion.Major -lt 5) { + MyThrow("Your PowerShell version is too old, please update it.") +} + + +if ($IsLinux -or $IsMacOS) { + $bootstrap_ext = ".sh" + $exe_ext = "" +} +elseif ($IsWindows -or $IsWindowsPowerShell) { + $bootstrap_ext = ".bat" + $exe_ext = ".exe" +} + +if ($InstallDARKNETdependenciesThroughVCPKGManifest -and -not $InstallDARKNETthroughVCPKG) { + Write-Host "You requested darknet dependencies to be installed by vcpkg in manifest mode but you didn't enable installation through vcpkg, doing that for you" + $InstallDARKNETthroughVCPKG = $true +} + +if ($InstallDARKNETthroughVCPKG -and -not $UseVCPKG) { + Write-Host "You requested darknet to be installed by vcpkg but you didn't enable vcpkg, doing that for you" + $UseVCPKG = $true +} + +if ($InstallDARKNETthroughVCPKG -and -not $EnableOPENCV) { + Write-Host "You requested darknet to be installed by vcpkg but you didn't enable OpenCV, doing that for you" + $EnableOPENCV = $true +} + +if ($UseVCPKG) { + Write-Host "vcpkg bootstrap script: bootstrap-vcpkg${bootstrap_ext}" +} + +if ((-Not $IsWindows) -and (-Not $IsWindowsPowerShell) -and (-Not $ForceSetupVS)) { $DoNotSetupVS = $true } if ($ForceStaticLib) { Write-Host "Forced CMake to produce a static library" - $additional_build_setup = " -DBUILD_SHARED_LIBS=OFF " + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DBUILD_SHARED_LIBS=OFF " } -if ($IsLinux -and $ForceGCC8) { - Write-Host "Manually setting CC and CXX variables to gcc-8 and g++-8" - $env:CC = "gcc-8" - $env:CXX = "g++-8" +if (($IsLinux -or $IsMacOS) -and ($ForceGCCVersion -gt 0)) { + Write-Host "Manually setting CC and CXX variables to gcc version $ForceGCCVersion" + $env:CC = "gcc-$ForceGCCVersion" + $env:CXX = "g++-$ForceGCCVersion" } -if ($IsWindows -and -Not $env:VCPKG_DEFAULT_TRIPLET) { +if (($IsWindows -or $IsWindowsPowerShell) -and (-Not $env:VCPKG_DEFAULT_TRIPLET)) { $env:VCPKG_DEFAULT_TRIPLET = "x64-windows" } if ($EnableCUDA) { - if($IsMacOS) { + if ($IsMacOS) { Write-Host "Cannot enable CUDA on macOS" -ForegroundColor Yellow $EnableCUDA = $false } @@ -64,24 +229,30 @@ else { Write-Host "OPENCV is disabled, please pass -EnableOPENCV to the script to enable" } -if ($EnableCUDA -and $EnableOPENCV -and -not $EnableOPENCV_CUDA) { +if ($EnableCUDA -and $EnableOPENCV -and (-Not $EnableOPENCV_CUDA)) { Write-Host "OPENCV with CUDA extension is not enabled, you can enable it passing -EnableOPENCV_CUDA" } -elseif ($EnableOPENCV -and $EnableOPENCV_CUDA -and -not $EnableCUDA) { +elseif ($EnableOPENCV -and $EnableOPENCV_CUDA -and (-Not $EnableCUDA)) { Write-Host "OPENCV with CUDA extension was requested, but CUDA is not enabled, you can enable it passing -EnableCUDA" $EnableOPENCV_CUDA = $false } -elseif ($EnableCUDA -and $EnableOPENCV_CUDA -and -not $EnableOPENCV) { +elseif ($EnableCUDA -and $EnableOPENCV_CUDA -and (-Not $EnableOPENCV)) { Write-Host "OPENCV with CUDA extension was requested, but OPENCV is not enabled, you can enable it passing -EnableOPENCV" $EnableOPENCV_CUDA = $false } -elseif ($EnableOPENCV_CUDA -and -not $EnableCUDA -and -not $EnableOPENCV) { +elseif ($EnableOPENCV_CUDA -and (-Not $EnableCUDA) -and (-Not $EnableOPENCV)) { Write-Host "OPENCV with CUDA extension was requested, but OPENCV and CUDA are not enabled, you can enable them passing -EnableOPENCV -EnableCUDA" $EnableOPENCV_CUDA = $false } if ($UseVCPKG) { Write-Host "VCPKG is enabled" + if ($DoNotUpdateVCPKG) { + Write-Host "VCPKG will not be updated to latest version if found" -ForegroundColor Yellow + } + else { + Write-Host "VCPKG will be updated to latest version if found" + } } else { Write-Host "VCPKG is disabled, please pass -UseVCPKG to the script to enable" @@ -94,6 +265,15 @@ else { Write-Host "VisualStudio integration is enabled, please pass -DoNotSetupVS to the script to disable" } +if ($EnableCSharpWrapper -and ($IsWindowsPowerShell -or $IsWindows)) { + Write-Host "Yolo C# wrapper integration is enabled. Will be built with Visual Studio generator. Disabling Ninja" + $DoNotUseNinja = $true +} +else { + $EnableCSharpWrapper = $false + Write-Host "Yolo C# wrapper integration is disabled, please pass -EnableCSharpWrapper to the script to enable. You must be on Windows!" +} + if ($DoNotUseNinja) { Write-Host "Ninja is disabled" } @@ -110,23 +290,72 @@ else { Push-Location $PSScriptRoot -$CMAKE_EXE = Get-Command cmake 2> $null | Select-Object -ExpandProperty Definition +$GIT_EXE = Get-Command "git" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition +if (-Not $GIT_EXE) { + MyThrow("Could not find git, please install it") +} +else { + Write-Host "Using git from ${GIT_EXE}" +} + +if (Test-Path "$PSScriptRoot/.git") { + Write-Host "Darknet has been cloned with git and supports self-updating mechanism" + if ($DoNotUpdateDARKNET) { + Write-Host "Darknet will not self-update sources" -ForegroundColor Yellow + } + else { + Write-Host "Darknet will self-update sources, please pass -DoNotUpdateDARKNET to the script to disable" + $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Updating darknet sources failed! Exited with error code $exitCode.") + } + } +} + +$CMAKE_EXE = Get-Command "cmake" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition if (-Not $CMAKE_EXE) { - throw "Could not find CMake, please install it" + MyThrow("Could not find CMake, please install it") } else { Write-Host "Using CMake from ${CMAKE_EXE}" + $proc = Start-Process -NoNewWindow -PassThru -FilePath ${CMAKE_EXE} -ArgumentList "--version" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("CMake version check failed! Exited with error code $exitCode.") + } } if (-Not $DoNotUseNinja) { - $NINJA_EXE = Get-Command ninja 2> $null | Select-Object -ExpandProperty Definition + $NINJA_EXE = Get-Command "ninja" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition if (-Not $NINJA_EXE) { - $DoNotUseNinja = $true - Write-Host "Could not find Ninja, using msbuild or make backends as a fallback" -ForegroundColor Yellow + DownloadNinja + $env:PATH += ";${PSScriptRoot}/ninja" + $NINJA_EXE = Get-Command "ninja" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition + if (-Not $NINJA_EXE) { + $DoNotUseNinja = $true + Write-Host "Could not find Ninja, unable to download a portable ninja, using msbuild or make backends as a fallback" -ForegroundColor Yellow + } } - else { + if ($NINJA_EXE) { Write-Host "Using Ninja from ${NINJA_EXE}" - $generator = "Ninja" + Write-Host -NoNewLine "Ninja version " + $proc = Start-Process -NoNewWindow -PassThru -FilePath ${NINJA_EXE} -ArgumentList "--version" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + $DoNotUseNinja = $true + Write-Host "Unable to run Ninja previously found, using msbuild or make backends as a fallback" -ForegroundColor Yellow + } + else { + $generator = "Ninja" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DCMAKE_BUILD_TYPE=Release" + } } } @@ -137,7 +366,7 @@ function getProgramFiles32bit() { } if ($null -eq $out) { - throw "Could not find [Program Files 32-bit]" + MyThrow("Could not find [Program Files 32-bit]") } return $out @@ -161,11 +390,11 @@ function getLatestVisualStudioWithDesktopWorkloadPath() { } } if (!$installationPath) { - Throw "Could not locate any installation of Visual Studio" + MyThrow("Could not locate any installation of Visual Studio") } } else { - Throw "Could not locate vswhere at $vswhereExe" + MyThrow("Could not locate vswhere at $vswhereExe") } return $installationPath } @@ -189,46 +418,124 @@ function getLatestVisualStudioWithDesktopWorkloadVersion() { } } if (!$installationVersion) { - Throw "Could not locate any installation of Visual Studio" + MyThrow("Could not locate any installation of Visual Studio") } } else { - Throw "Could not locate vswhere at $vswhereExe" + MyThrow("Could not locate vswhere at $vswhereExe") } return $installationVersion } +$vcpkg_root_set_by_this_script = $false if ((Test-Path env:VCPKG_ROOT) -and $UseVCPKG) { $vcpkg_path = "$env:VCPKG_ROOT" Write-Host "Found vcpkg in VCPKG_ROOT: $vcpkg_path" - $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" } elseif ((Test-Path "${env:WORKSPACE}/vcpkg") -and $UseVCPKG) { $vcpkg_path = "${env:WORKSPACE}/vcpkg" $env:VCPKG_ROOT = "${env:WORKSPACE}/vcpkg" + $vcpkg_root_set_by_this_script = $true Write-Host "Found vcpkg in WORKSPACE/vcpkg: $vcpkg_path" - $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" -} -elseif ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) { - $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}" - $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}" - Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${RUNVCPKG_VCPKG_ROOT_OUT}" - $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" +} +elseif (-not($null -eq ${RUNVCPKG_VCPKG_ROOT_OUT})) { + if ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) { + $vcpkg_path = "${RUNVCPKG_VCPKG_ROOT_OUT}" + $env:VCPKG_ROOT = "${RUNVCPKG_VCPKG_ROOT_OUT}" + $vcpkg_root_set_by_this_script = $true + Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${vcpkg_path}" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" + } } -elseif ((Test-Path "$PWD/vcpkg") -and $UseVCPKG) { +elseif ($UseVCPKG) { + if (-Not (Test-Path "$PWD/vcpkg")) { + $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "clone https://github.com/microsoft/vcpkg" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-not ($exitCode -eq 0)) { + MyThrow("Cloning vcpkg sources failed! Exited with error code $exitCode.") + } + } $vcpkg_path = "$PWD/vcpkg" $env:VCPKG_ROOT = "$PWD/vcpkg" + $vcpkg_root_set_by_this_script = $true Write-Host "Found vcpkg in $PWD/vcpkg: $PWD/vcpkg" - $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" } else { Write-Host "Skipping vcpkg integration`n" -ForegroundColor Yellow - $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF" +} + +if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and (-Not $DoNotUpdateVCPKG)) { + Push-Location $vcpkg_path + $proc = Start-Process -NoNewWindow -PassThru -FilePath $GIT_EXE -ArgumentList "pull" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Updating vcpkg sources failed! Exited with error code $exitCode.") + } + $proc = Start-Process -NoNewWindow -PassThru -FilePath $PWD/bootstrap-vcpkg${bootstrap_ext} -ArgumentList "-disableMetrics" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Bootstrapping vcpkg failed! Exited with error code $exitCode.") + } + Pop-Location +} + +if ($UseVCPKG -and ($vcpkg_path.length -gt 40) -and ($IsWindows -or $IsWindowsPowerShell)) { + Write-Host "vcpkg path is very long and might fail. Please move it or" -ForegroundColor Yellow + Write-Host "the entire darknet folder to a shorter path, like C:\darknet" -ForegroundColor Yellow + Write-Host "You can use the subst command to ease the process if necessary" -ForegroundColor Yellow + if (-Not $DisableInteractive) { + $Result = Read-Host "Do you still want to continue? (yes/no)" + if (($Result -eq 'No') -or ($Result -eq 'N') -or ($Result -eq 'no') -or ($Result -eq 'n')) { + MyThrow("Build aborted") + } + } +} + +if ($ForceVCPKGCacheRemoval -and (-Not $UseVCPKG)) { + Write-Host "VCPKG is not enabled, so local vcpkg binary cache will not be deleted even if requested" -ForegroundColor Yellow +} + +if (($ForceOpenCVVersion -eq 2) -and $UseVCPKG) { + Write-Host "You requested OpenCV version 2, so vcpkg will install that version" -ForegroundColor Yellow + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV2=ON" +} + +if (($ForceOpenCVVersion -eq 3) -and $UseVCPKG) { + Write-Host "You requested OpenCV version 3, so vcpkg will install that version" -ForegroundColor Yellow + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_USE_OPENCV4=OFF -DVCPKG_USE_OPENCV3=ON" +} + +if ($UseVCPKG -and $ForceVCPKGCacheRemoval) { + if ($IsWindows -or $IsWindowsPowerShell) { + $vcpkgbinarycachepath = "$env:LOCALAPPDATA/vcpkg/archive" + } + elseif ($IsLinux) { + $vcpkgbinarycachepath = "$env:HOME/.cache/vcpkg/archive" + } + elseif ($IsMacOS) { + $vcpkgbinarycachepath = "$env:HOME/.cache/vcpkg/archive" + } + else { + MyThrow("Unknown OS, unsupported") + } + Write-Host "Removing local vcpkg binary cache from $vcpkgbinarycachepath" -ForegroundColor Yellow + Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $vcpkgbinarycachepath } if (-Not $DoNotSetupVS) { - if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) { + $CL_EXE = Get-Command "cl" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition + if ((-Not $CL_EXE) -or ($CL_EXE -match "HostX86\\x86") -or ($CL_EXE -match "HostX64\\x86")) { $vsfound = getLatestVisualStudioWithDesktopWorkloadPath Write-Host "Found VS in ${vsfound}" Push-Location "${vsfound}\Common7\Tools" @@ -239,7 +546,7 @@ if (-Not $DoNotSetupVS) { } } Pop-Location - Write-Host "Visual Studio Command Prompt variables set" -ForegroundColor Yellow + Write-Host "Visual Studio Command Prompt variables set" } $tokens = getLatestVisualStudioWithDesktopWorkloadVersion @@ -249,18 +556,18 @@ if (-Not $DoNotSetupVS) { $selectConfig = " --config Release " if ($tokens[0] -eq "14") { $generator = "Visual Studio 14 2015" - $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`"" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A `"x64`"" } elseif ($tokens[0] -eq "15") { $generator = "Visual Studio 15 2017" - $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`"" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A `"x64`"" } elseif ($tokens[0] -eq "16") { $generator = "Visual Studio 16 2019" - $additional_build_setup = $additional_build_setup + " -T `"host=x64`" -A `"x64`"" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -T `"host=x64`" -A `"x64`"" } else { - throw "Unknown Visual Studio version, unsupported configuration" + MyThrow("Unknown Visual Studio version, unsupported configuration") } } if (-Not $UseVCPKG) { @@ -269,14 +576,16 @@ if (-Not $DoNotSetupVS) { } if ($DoNotSetupVS -and $DoNotUseNinja) { $generator = "Unix Makefiles" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DCMAKE_BUILD_TYPE=Release" } -Write-Host "Setting up environment to use CMake generator: $generator" -ForegroundColor Yellow +Write-Host "Setting up environment to use CMake generator: $generator" if (-Not $IsMacOS -and $EnableCUDA) { - if ($null -eq (Get-Command "nvcc" -ErrorAction SilentlyContinue)) { + $NVCC_EXE = Get-Command "nvcc" -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Definition + if (-Not $NVCC_EXE) { if (Test-Path env:CUDA_PATH) { $env:PATH += ";${env:CUDA_PATH}/bin" - Write-Host "Found cuda in ${env:CUDA_PATH}" -ForegroundColor Yellow + Write-Host "Found cuda in ${env:CUDA_PATH}" } else { Write-Host "Unable to find CUDA, if necessary please install it or define a CUDA_PATH env variable pointing to the install folder" -ForegroundColor Yellow @@ -296,37 +605,142 @@ if (-Not $IsMacOS -and $EnableCUDA) { } if ($ForceCPP) { - $additional_build_setup = $additional_build_setup + " -DBUILD_AS_CPP:BOOL=ON" + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DBUILD_AS_CPP:BOOL=ON" } -if (-Not($EnableCUDA)) { - $additional_build_setup = $additional_build_setup + " -DENABLE_CUDA:BOOL=OFF" +if (-Not $EnableCUDA) { + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CUDA:BOOL=OFF" } -if (-Not($EnableCUDNN)) { - $additional_build_setup = $additional_build_setup + " -DENABLE_CUDNN:BOOL=OFF" +if (-Not $EnableCUDNN) { + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CUDNN:BOOL=OFF" } -if (-Not($EnableOPENCV)) { - $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV:BOOL=OFF" +if (-Not $EnableOPENCV) { + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_OPENCV:BOOL=OFF" } -if ($EnableOPENCV_CUDA) { - $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV_WITH_CUDA:BOOL=ON" +if (-Not $EnableOPENCV_CUDA) { + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF" } -New-Item -Path ./build_release -ItemType directory -Force -Set-Location build_release -$cmake_args = "-G `"$generator`" ${additional_build_setup} -S .." -Write-Host "CMake args: $cmake_args" -Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList $cmake_args -Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${number_of_build_workers} --target install" -Remove-Item DarknetConfig.cmake -Remove-Item DarknetConfigVersion.cmake -$dllfiles = Get-ChildItem ./${dllfolder}/*.dll -if ($dllfiles) { - Copy-Item $dllfiles .. +if ($EnableCSharpWrapper) { + $AdditionalBuildSetup = $AdditionalBuildSetup + " -DENABLE_CSHARP_WRAPPER:BOOL=ON" } -Set-Location .. -Copy-Item cmake/Modules/*.cmake share/darknet/ -Pop-Location + +if ($InstallDARKNETthroughVCPKG) { + if ($ForceVCPKGDarknetHEAD) { + $headMode = " --head " + } + $features = "opencv-base" + $feature_manifest_opencv = "--x-feature=opencv-base" + if ($EnableCUDA) { + $features = $features + ",cuda" + $feature_manifest_cuda = "--x-feature=cuda" + } + if ($EnableCUDNN) { + $features = $features + ",cudnn" + $feature_manifest_cudnn = "--x-feature=cudnn" + } + if (-not (Test-Path "${env:VCPKG_ROOT}/vcpkg${exe_ext}")) { + $proc = Start-Process -NoNewWindow -PassThru -FilePath ${env:VCPKG_ROOT}/bootstrap-vcpkg${bootstrap_ext} -ArgumentList "-disableMetrics" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Bootstrapping vcpkg failed! Exited with error code $exitCode.") + } + } + if ($InstallDARKNETdependenciesThroughVCPKGManifest) { + Write-Host "Running vcpkg in manifest mode to install darknet dependencies" + Write-Host "vcpkg install --x-no-default-features $feature_manifest_opencv $feature_manifest_cuda $feature_manifest_cudnn $headMode" + $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " install --x-no-default-features $feature_manifest_opencv $feature_manifest_cuda $feature_manifest_cudnn $headMode " + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Installing darknet through vcpkg failed! Exited with error code $exitCode.") + } + } + else { + Write-Host "Running vcpkg to install darknet" + Write-Host "vcpkg install darknet[${features}] $headMode --recurse" + Push-Location ${env:VCPKG_ROOT} + if ($ForceVCPKGDarknetHEAD) { + $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " --feature-flags=-manifests remove darknet --recurse " + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Removing darknet through vcpkg failed! Exited with error code $exitCode.") + } + } + $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " --feature-flags=-manifests upgrade --no-dry-run " + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Upgrading vcpkg installed ports failed! Exited with error code $exitCode.") + } + $proc = Start-Process -NoNewWindow -PassThru -FilePath "${env:VCPKG_ROOT}/vcpkg${exe_ext}" -ArgumentList " --feature-flags=-manifests install darknet[${features}] $headMode --recurse " # "-manifest" disables the manifest feature, so that if vcpkg is a subfolder of darknet, the vcpkg.json inside darknet folder does not trigger errors due to automatic manifest mode + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Installing darknet dependencies through vcpkg failed! Exited with error code $exitCode.") + } + Pop-Location + } +} +else { + $build_folder = "./build_release" + if (-Not $DoNotDeleteBuildFolder) { + Write-Host "Removing folder $build_folder" -ForegroundColor Yellow + Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder + } + New-Item -Path $build_folder -ItemType directory -Force | Out-Null + Set-Location $build_folder + $cmake_args = "-G `"$generator`" ${AdditionalBuildSetup} -S .." + Write-Host "Configuring CMake project" -ForegroundColor Green + Write-Host "CMake args: $cmake_args" + $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList $cmake_args + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Config failed! Exited with error code $exitCode.") + } + Write-Host "Building CMake project" -ForegroundColor Green + $proc = Start-Process -NoNewWindow -PassThru -FilePath $CMAKE_EXE -ArgumentList "--build . ${selectConfig} --parallel ${NumberOfBuildWorkers} --target install" + $handle = $proc.Handle + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if (-Not ($exitCode -eq 0)) { + MyThrow("Config failed! Exited with error code $exitCode.") + } + Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfig.cmake + Remove-Item -Force -ErrorAction SilentlyContinue DarknetConfigVersion.cmake + $dllfiles = Get-ChildItem ./${dllfolder}/*.dll + if ($dllfiles) { + Copy-Item $dllfiles .. + } + Set-Location .. + Copy-Item cmake/Modules/*.cmake share/darknet/ + Pop-Location +} + +Write-Host "Build complete!" -ForegroundColor Green + +if ($DownloadWeights) { + Write-Host "Downloading weights..." -ForegroundColor Yellow + & $PSScriptRoot/scripts/download_weights.ps1 + Write-Host "Weights downloaded" -ForegroundColor Green +} + +if ($vcpkg_root_set_by_this_script) { + $env:VCPKG_ROOT = $null +} + +$ErrorActionPreference = "SilentlyContinue" +Stop-Transcript | out-null +$ErrorActionPreference = "Continue" diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index 5282de26d91..5c8a7c3e1b9 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -223,6 +223,7 @@ + @@ -286,6 +287,7 @@ + diff --git a/build/darknet/darknet_no_gpu.vcxproj b/build/darknet/darknet_no_gpu.vcxproj index 72d23af3d09..fadf7289694 100644 --- a/build/darknet/darknet_no_gpu.vcxproj +++ b/build/darknet/darknet_no_gpu.vcxproj @@ -227,6 +227,7 @@ + @@ -290,6 +291,7 @@ + diff --git a/build/darknet/x64/cfg/yolov4-p5-frozen.cfg b/build/darknet/x64/cfg/yolov4-p5-frozen.cfg new file mode 100644 index 00000000000..38bebe32a21 --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-p5-frozen.cfg @@ -0,0 +1,1838 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=896 +height=896 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#use_cuda_graph = 1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 173 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 189 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 205 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 189 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 218 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 173 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 231 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=1 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 205 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 218 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 231 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +# ============ End of Head ============ # \ No newline at end of file diff --git a/build/darknet/x64/cfg/yolov4-p5.cfg b/build/darknet/x64/cfg/yolov4-p5.cfg new file mode 100644 index 00000000000..14bce30ebd2 --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-p5.cfg @@ -0,0 +1,1837 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=896 +height=896 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#use_cuda_graph = 1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 173 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 189 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 205 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 189 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 218 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 173 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 231 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 205 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 218 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 231 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +# ============ End of Head ============ # \ No newline at end of file diff --git a/build/darknet/x64/cfg/yolov4-p6.cfg b/build/darknet/x64/cfg/yolov4-p6.cfg new file mode 100644 index 00000000000..8defa150b6f --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-p6.cfg @@ -0,0 +1,2298 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#use_cuda_graph = 1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 186 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 201 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 158 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 217 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 233 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 249 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 233 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 262 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 217 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 275 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 201 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 288 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 249 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 262 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 275 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-6 + +[route] +layers = 288 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 12,13,14,15 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +# ============ End of Head ============ # \ No newline at end of file diff --git a/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg new file mode 100644 index 00000000000..1461d88838e --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg @@ -0,0 +1,1429 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=800 + + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -7 + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -10 +activation=linear + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 17 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -19 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +#### BFM-1 + +[route] +layers = 17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=256 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 137 +#layers = -1, -20 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 126 +# layers = -1, -49 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 diff --git a/build/darknet/x64/darknet.py b/build/darknet/x64/darknet.py index bb0ee183688..23539e0b64d 100644 --- a/build/darknet/x64/darknet.py +++ b/build/darknet/x64/darknet.py @@ -25,6 +25,8 @@ import random import os +print("Run: darknet_images.py or:\n") +print("python.exe darknet_video.py --data_file cfg/coco.data --config_file cfg/yolov4.cfg --weights yolov4.weights --input test.mp4 \n") class BOX(Structure): _fields_ = [("x", c_float), diff --git a/build/darknet/x64/darknet_python.cmd b/build/darknet/x64/darknet_python.cmd index b2df11d8207..74556582e82 100644 --- a/build/darknet/x64/darknet_python.cmd +++ b/build/darknet/x64/darknet_python.cmd @@ -14,6 +14,6 @@ rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install sci rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install scipy rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install opencv-python -C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet.py +C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet_images.py pause \ No newline at end of file diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd index 26a701a9c40..6080cf68b89 100644 --- a/build/darknet/x64/partial.cmd +++ b/build/darknet/x64/partial.cmd @@ -2,13 +2,28 @@ rem Download weights for - DenseNet201, ResNet50 and ResNet152 by this link: htt rem Download Yolo/Tiny-yolo: https://pjreddie.com/darknet/yolo/ rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights +darknet.exe partial cfg/yolov4-csp.cfg yolov4-csp.weights yolov4-csp.conv.142 142 + +darknet.exe partial cfg/yolov4x-mish.cfg yolov4x-mish.weights yolov4x-mish.conv.166 166 + + + + +rem darknet.exe partial cfg/yolov4-p5.cfg yolov4-p5.weights yolov4-p5.conv.232 232 + +rem darknet.exe partial cfg/yolov4-p6.cfg yolov4-p6.weights yolov4-p6.conv.289 289 + + rem darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.13 13 -darknet.exe partial cfg/yolov4-tiny.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29 +rem darknet.exe partial cfg/yolov4-tiny.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29 + + +rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137 +rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.105 105 -darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137 pause diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index 4bd09206caa..35742aa2be4 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -52,7 +52,7 @@ - + @@ -155,7 +155,7 @@ 64 - compute_30,sm_30;compute_75,sm_75 + compute_35,sm_35;compute_75,sm_75 @@ -225,6 +225,7 @@ + @@ -290,6 +291,7 @@ + @@ -306,6 +308,6 @@ - + \ No newline at end of file diff --git a/cfg/yolov4-p5-frozen.cfg b/cfg/yolov4-p5-frozen.cfg new file mode 100644 index 00000000000..38bebe32a21 --- /dev/null +++ b/cfg/yolov4-p5-frozen.cfg @@ -0,0 +1,1838 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=896 +height=896 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#use_cuda_graph = 1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 173 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 189 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 205 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 189 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 218 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 173 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 231 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=1 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 205 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 218 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 231 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +# ============ End of Head ============ # \ No newline at end of file diff --git a/cfg/yolov4-p5.cfg b/cfg/yolov4-p5.cfg new file mode 100644 index 00000000000..14bce30ebd2 --- /dev/null +++ b/cfg/yolov4-p5.cfg @@ -0,0 +1,1837 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=896 +height=896 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#use_cuda_graph = 1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 173 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 189 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 205 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 189 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 218 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 173 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 231 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 205 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 218 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 231 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 48,102, 119,96, 97,189, 217,184, 171,384, 324,451, 616,618, 800,800 +classes=80 +num=12 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +# ============ End of Head ============ # \ No newline at end of file diff --git a/cfg/yolov4-p6.cfg b/cfg/yolov4-p6.cfg new file mode 100644 index 00000000000..8defa150b6f --- /dev/null +++ b/cfg/yolov4-p6.cfg @@ -0,0 +1,2298 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#use_cuda_graph = 1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 186 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 201 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 158 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 217 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 233 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 249 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 233 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 262 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 217 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 275 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 201 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 288 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 249 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 262 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 275 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-6 + +[route] +layers = 288 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=logistic +#activation=linear +# use linear for Pytorch-Scaled-YOLOv4, and logistic for Darknet + +[yolo] +mask = 12,13,14,15 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +# ============ End of Head ============ # \ No newline at end of file diff --git a/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg new file mode 100644 index 00000000000..1461d88838e --- /dev/null +++ b/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg @@ -0,0 +1,1429 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=800 + + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -7 + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -10 +activation=linear + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 17 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -19 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +#### BFM-1 + +[route] +layers = 17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=256 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 137 +#layers = -1, -20 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 126 +# layers = -1, -49 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 diff --git a/darknet.py b/darknet.py index ad526f99d95..698f0469ad5 100644 --- a/darknet.py +++ b/darknet.py @@ -1,26 +1,13 @@ -#!python3 +#!/usr/bin/env python3 + """ Python 3 wrapper for identifying objects in images -Requires DLL compilation - -Both the GPU and no-GPU version should be compiled; the no-GPU version should be renamed "yolo_cpp_dll_nogpu.dll". - -On a GPU system, you can force CPU evaluation by any of: - -- Set global variable DARKNET_FORCE_CPU to True -- Set environment variable CUDA_VISIBLE_DEVICES to -1 -- Set environment variable "FORCE_CPU" to "true" -- Set environment variable "DARKNET_PATH" to path darknet lib .so (for Linux) - +Running the script requires opencv-python to be installed (`pip install opencv-python`) Directly viewing or returning bounding-boxed images requires scikit-image to be installed (`pip install scikit-image`) - -Original *nix 2.7: https://github.com/pjreddie/darknet/blob/0f110834f4e18b30d5f101bf8f1724c34b7b83db/python/darknet.py -Windows Python 2.7 version: https://github.com/AlexeyAB/darknet/blob/fc496d52bf22a0bb257300d3c79be9cd80e722cb/build/darknet/x64/darknet.py - -@author: Philip Kahn -@date: 20180503 +Use pip3 instead of pip on some systems to be sure to install modules for python3 """ + from ctypes import * import math import random @@ -178,51 +165,17 @@ def detect_image(network, class_names, image, thresh=.5, hier_thresh=.5, nms=.45 return sorted(predictions, key=lambda x: x[1]) -# lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) -# lib = CDLL("libdarknet.so", RTLD_GLOBAL) -hasGPU = True -if os.name == "nt": +if os.name == "posix": + cwd = os.path.dirname(__file__) + lib = CDLL(cwd + "/libdarknet.so", RTLD_GLOBAL) +elif os.name == "nt": cwd = os.path.dirname(__file__) os.environ['PATH'] = cwd + ';' + os.environ['PATH'] - winGPUdll = os.path.join(cwd, "yolo_cpp_dll.dll") - winNoGPUdll = os.path.join(cwd, "yolo_cpp_dll_nogpu.dll") - envKeys = list() - for k, v in os.environ.items(): - envKeys.append(k) - try: - try: - tmp = os.environ["FORCE_CPU"].lower() - if tmp in ["1", "true", "yes", "on"]: - raise ValueError("ForceCPU") - else: - print("Flag value {} not forcing CPU mode".format(tmp)) - except KeyError: - # We never set the flag - if 'CUDA_VISIBLE_DEVICES' in envKeys: - if int(os.environ['CUDA_VISIBLE_DEVICES']) < 0: - raise ValueError("ForceCPU") - try: - global DARKNET_FORCE_CPU - if DARKNET_FORCE_CPU: - raise ValueError("ForceCPU") - except NameError as cpu_error: - print(cpu_error) - if not os.path.exists(winGPUdll): - raise ValueError("NoDLL") - lib = CDLL(winGPUdll, RTLD_GLOBAL) - except (KeyError, ValueError): - hasGPU = False - if os.path.exists(winNoGPUdll): - lib = CDLL(winNoGPUdll, RTLD_GLOBAL) - print("Notice: CPU-only mode") - else: - # Try the other way, in case no_gpu was compile but not renamed - lib = CDLL(winGPUdll, RTLD_GLOBAL) - print("Environment variables indicated a CPU run, but we didn't find {}. Trying a GPU run anyway.".format(winNoGPUdll)) + lib = CDLL("darknet.dll", RTLD_GLOBAL) else: - lib = CDLL(os.path.join( - os.environ.get('DARKNET_PATH', './'), - "libdarknet.so"), RTLD_GLOBAL) + print("Unsupported OS") + exit + lib.network_width.argtypes = [c_void_p] lib.network_width.restype = c_int lib.network_height.argtypes = [c_void_p] @@ -235,10 +188,7 @@ def detect_image(network, class_names, image, thresh=.5, hier_thresh=.5, nms=.45 predict.argtypes = [c_void_p, POINTER(c_float)] predict.restype = POINTER(c_float) -if hasGPU: - set_gpu = lib.cuda_set_device - set_gpu.argtypes = [c_int] - +set_gpu = lib.cuda_set_device init_cpu = lib.init_cpu make_image = lib.make_image diff --git a/darknet_images.py b/darknet_images.py index 0f1fca9b7ae..17ac91731a7 100644 --- a/darknet_images.py +++ b/darknet_images.py @@ -162,7 +162,7 @@ def save_annotations(name, image, detections, class_names): """ Files saved with image_name.txt and relative coordinates """ - file_name = name.split(".")[:-1][0] + ".txt" + file_name = os.path.splitext(name)[0] + ".txt" with open(file_name, "w") as f: for label, confidence, bbox in detections: x, y, w, h = convert2relative(image, bbox) diff --git a/darknet_video.py b/darknet_video.py index cc20b266606..04895133f97 100644 --- a/darknet_video.py +++ b/darknet_video.py @@ -60,16 +60,61 @@ def set_saved_video(input_video, output_video, size): return video +def convert2relative(bbox): + """ + YOLO format use relative coordinates for annotation + """ + x, y, w, h = bbox + _height = darknet_height + _width = darknet_width + return x/_width, y/_height, w/_width, h/_height + + +def convert2original(image, bbox): + x, y, w, h = convert2relative(bbox) + + image_h, image_w, __ = image.shape + + orig_x = int(x * image_w) + orig_y = int(y * image_h) + orig_width = int(w * image_w) + orig_height = int(h * image_h) + + bbox_converted = (orig_x, orig_y, orig_width, orig_height) + + return bbox_converted + + +def convert4cropping(image, bbox): + x, y, w, h = convert2relative(bbox) + + image_h, image_w, __ = image.shape + + orig_left = int((x - w / 2.) * image_w) + orig_right = int((x + w / 2.) * image_w) + orig_top = int((y - h / 2.) * image_h) + orig_bottom = int((y + h / 2.) * image_h) + + if (orig_left < 0): orig_left = 0 + if (orig_right > image_w - 1): orig_right = image_w - 1 + if (orig_top < 0): orig_top = 0 + if (orig_bottom > image_h - 1): orig_bottom = image_h - 1 + + bbox_cropping = (orig_left, orig_top, orig_right, orig_bottom) + + return bbox_cropping + + def video_capture(frame_queue, darknet_image_queue): while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - frame_resized = cv2.resize(frame_rgb, (width, height), + frame_resized = cv2.resize(frame_rgb, (darknet_width, darknet_height), interpolation=cv2.INTER_LINEAR) - frame_queue.put(frame_resized) - img_for_detect = darknet.make_image(width, height, 3) + frame_queue.put(frame) + img_for_detect = darknet.make_image(darknet_width, darknet_height, 3) darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes()) darknet_image_queue.put(img_for_detect) cap.release() @@ -91,18 +136,22 @@ def inference(darknet_image_queue, detections_queue, fps_queue): def drawing(frame_queue, detections_queue, fps_queue): random.seed(3) # deterministic bbox colors - video = set_saved_video(cap, args.out_filename, (width, height)) + video = set_saved_video(cap, args.out_filename, (darknet_width, darknet_height)) while cap.isOpened(): - frame_resized = frame_queue.get() + frame = frame_queue.get() detections = detections_queue.get() fps = fps_queue.get() - if frame_resized is not None: - image = darknet.draw_boxes(detections, frame_resized, class_colors) + detections_adjusted = [] + if frame is not None: + for label, confidence, bbox in detections: + bbox_adjusted = convert2original(frame, bbox) + detections_adjusted.append((str(label), confidence, bbox_adjusted)) + image = darknet.draw_boxes(detections_adjusted, frame, class_colors) + if not args.dont_show: + cv2.imshow('Inference', image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if args.out_filename is not None: video.write(image) - if not args.dont_show: - cv2.imshow('Inference', image) if cv2.waitKey(fps) == 27: break cap.release() @@ -124,8 +173,8 @@ def drawing(frame_queue, detections_queue, fps_queue): args.weights, batch_size=1 ) - width = darknet.network_width(network) - height = darknet.network_height(network) + darknet_width = darknet.network_width(network) + darknet_height = darknet.network_height(network) input_path = str2int(args.input) cap = cv2.VideoCapture(input_path) Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start() diff --git a/include/darknet.h b/include/darknet.h index 1a5fdf75586..6daaaf1ca65 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -192,7 +192,8 @@ typedef enum { L2NORM, EMPTY, BLANK, - CONTRASTIVE + CONTRASTIVE, + IMPLICIT } LAYER_TYPE; // layer.h @@ -1055,7 +1056,7 @@ LIB_API void optimize_picture(network *net, image orig, int max_layer, float sca // image.h LIB_API void make_image_red(image im); -LIB_API image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c); +LIB_API image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c, float alpha); LIB_API image resize_image(image im, int w, int h); LIB_API void quantize_image(image im); LIB_API void copy_image_from_bytes(image im, char *pdata); diff --git a/scripts/README.md b/scripts/README.md index a641e673d0e..91b17224361 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,8 +1,6 @@ +# Datasets - -### Datasets: - -59.26TB of research data: http://academictorrents.com/ +59.26TB of research data: http://academictorrents.com/ ImageNet Torrent (Stanford): http://academictorrents.com/browse.php?search=imagenet&page=0 @@ -54,7 +52,6 @@ Visual Question Answering: https://visualqa.org/download.html Large Movie Review Dataset: http://ai.stanford.edu/~amaas/data/sentiment/ - ---- Wikipedia's List of datasets: https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research @@ -71,4 +68,4 @@ Pedestrian DATASETs for Vision based Detection and Tracking: https://hemprasad.w TrackingNet: https://tracking-net.org/ -RGB, RGBD, Texture-mapped 3D mesh models: http://www.ycbbenchmarks.com/ \ No newline at end of file +RGB, RGBD, Texture-mapped 3D mesh models: http://www.ycbbenchmarks.com/ diff --git a/scripts/deploy-cuda.ps1 b/scripts/deploy-cuda.ps1 new file mode 100644 index 00000000000..83b64d29559 --- /dev/null +++ b/scripts/deploy-cuda.ps1 @@ -0,0 +1,27 @@ +#!/usr/bin/env pwsh + +$url = 'https://developer.download.nvidia.com/compute/cuda/11.3.0/network_installers/cuda_11.3.0_win10_network.exe' + +$CudaFeatures = 'nvcc_11.3 cuobjdump_11.3 nvprune_11.3 cupti_11.3 memcheck_11.3 nvdisasm_11.3 nvprof_11.3 ' + ` + 'visual_studio_integration_11.3 visual_profiler_11.3 visual_profiler_11.3 cublas_11.3 cublas_dev_11.3 ' + ` + 'cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 ' + ` + 'cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3 ' + ` + 'occupancy_calculator_11.3 ' + +try { + Write-Host 'Downloading CUDA...' + Invoke-WebRequest -Uri $url -OutFile "cuda_11.3.0_win10_network.exe" + Write-Host 'Installing CUDA...' + $proc = Start-Process -PassThru -FilePath "./cuda_11.3.0_win10_network.exe" -ArgumentList @('-s ' + $CudaFeatures) + $proc.WaitForExit() + $exitCode = $proc.ExitCode + if ($exitCode -eq 0) { + Write-Host 'Installation successful!' + } + else { + Throw "Installation failed! Exited with $exitCode." + } +} +catch { + Throw "Failed to install CUDA! $($_.Exception.Message)" +} diff --git a/scripts/deploy-cuda.sh b/scripts/deploy-cuda.sh new file mode 100755 index 00000000000..65f173aabaf --- /dev/null +++ b/scripts/deploy-cuda.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +if [[ "$OSTYPE" == "darwin"* ]]; then + echo "Unable to deploy CUDA on macOS, please wait for a future script update" +else + if [[ $(cut -f2 <<< $(lsb_release -r)) == "18.04" ]]; then + sudo apt-get update + sudo apt-get install build-essential g++ + sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget + wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb + sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub + sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb + wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo apt-get update + sudo apt-get dist-upgrade -y + sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2 + sudo apt-get install -y --no-install-recommends libcudnn7-dev + sudo rm -rf /usr/local/cuda + sudo ln -s /usr/local/cuda-10.2 /usr/local/cuda + elif [[ $(cut -f2 <<< $(lsb_release -r)) == "20.04" ]]; then + sudo apt-get update + sudo apt-get install build-essential g++ + sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget + sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin + sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub + sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" + sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /" + sudo apt-get update + sudo apt-get dist-upgrade -y + sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2 + sudo apt-get install -y --no-install-recommends libcudnn8-dev + sudo rm -rf /usr/local/cuda + sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda + else + echo "Unable to deploy CUDA on this Linux version, please wait for a future script update" + fi +fi diff --git a/scripts/dice_label.sh b/scripts/dice_label.sh old mode 100644 new mode 100755 diff --git a/scripts/get_coco2017.sh b/scripts/get_coco2017.sh old mode 100644 new mode 100755 diff --git a/scripts/get_coco_dataset.sh b/scripts/get_coco_dataset.sh old mode 100644 new mode 100755 diff --git a/scripts/get_imagenet_train.sh b/scripts/get_imagenet_train.sh old mode 100644 new mode 100755 diff --git a/scripts/imagenet_label.sh b/scripts/imagenet_label.sh old mode 100644 new mode 100755 diff --git a/scripts/install_OpenCV4.sh b/scripts/install_OpenCV4.sh old mode 100644 new mode 100755 diff --git a/scripts/setup.ps1 b/scripts/setup.ps1 index b846bab1656..ca54dba9754 100755 --- a/scripts/setup.ps1 +++ b/scripts/setup.ps1 @@ -1,32 +1,39 @@ #!/usr/bin/env pwsh -$install_cuda = $false +param ( + [switch]$InstallCUDA = $false +) if ($null -eq (Get-Command "choco.exe" -ErrorAction SilentlyContinue)) { # Download and install Chocolatey - Set-ExecutionPolicy unrestricted -Scope CurrentUser Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')) - Write-Host "Please close and re-open powershell and then re-run setup.ps1 script" - Break + Throw "Please close and re-open powershell and then re-run setup.ps1 script" } Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y cmake ninja powershell git vscode" Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y visualstudio2019buildtools --package-parameters `"--add Microsoft.VisualStudio.Component.VC.CoreBuildTools --includeRecommended --includeOptional --passive --locale en-US --lang en-US`"" +Push-Location $PSScriptRoot -if ($install_cuda) { - Start-Process -FilePath "choco" -Verb runAs -ArgumentList " install -y cuda" - $features = "full" +if ($InstallCUDA) { + & $PSScriptRoot/deploy-cuda.ps1 + $env:CUDA_PATH="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + $env:CUDA_TOOLKIT_ROOT_DIR="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + $env:CUDACXX="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3\\bin\\nvcc.exe" + $CUDAisAvailable = $true } else { if (-not $null -eq $env:CUDA_PATH) { - $features = "full" + $CUDAisAvailable = $true } else{ - $features = "opencv-base" + $CUDAisAvailable = $false } } -git.exe clone https://github.com/microsoft/vcpkg -Set-Location vcpkg -.\bootstrap-vcpkg.bat -disableMetrics -.\vcpkg.exe install darknet[${features}]:x64-windows +if ($CUDAisAvailable) { + & $PSScriptRoot/../build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET + #& $PSScriptRoot/../build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableOPENCV_CUDA -DisableInteractive -DoNotUpdateDARKNET +} +else { + & $PSScriptRoot/../build.ps1 -UseVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET +} diff --git a/scripts/setup.sh b/scripts/setup.sh index 12a769b8881..51d641bcea4 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -1,79 +1,102 @@ #!/usr/bin/env bash -## enable or disable installed components +install_tools=false +bypass_driver_installation=false -install_cuda=true +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" -########################### +case $key in + -InstallCUDA|--InstallCUDA) + install_tools=true + shift + ;; + -BypassDRIVER|--BypassDRIVER) + bypass_driver_installation=true + shift + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +echo "This script is located in $script_dir" +cd $script_dir/.. temp_folder="./temp" mkdir -p $temp_folder cd $temp_folder -sudo apt-get install cmake git ninja-build build-essential g++ - -if [ "$install_cuda" = true ] ; then +if [ "$install_tools" = true ] ; then + $script_dir/deploy-cuda.sh if [[ "$OSTYPE" == "darwin"* ]]; then - echo "Unable to provide CUDA on macOS" + echo "Unable to provide tools on macOS, please wait for a future script update or do not put -InstallCUDA command line flag to continue" else - # Download and install CUDA if [[ $(cut -f2 <<< $(lsb_release -r)) == "18.04" ]]; then - wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb - sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub - sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb - wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb - sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb - sudo apt update + sudo apt-get update + sudo apt-get install git ninja-build build-essential g++ nasm yasm + sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null + sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' + wget -q https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb + sudo dpkg -i packages-microsoft-prod.deb + sudo add-apt-repository universe + sudo apt-get update sudo apt-get dist-upgrade -y - sudo apt-get install -y --no-install-recommends cuda-compiler-10-2 cuda-libraries-dev-10-2 cuda-driver-dev-10-2 cuda-cudart-dev-10-2 cuda-curand-dev-10-2 - sudo apt-get install -y --no-install-recommends libcudnn7-dev - sudo rm -rf /usr/local/cuda - sudo ln -s /usr/local/cuda-10.2 /usr/local/cuda + sudo apt-get install -y cmake + sudo apt-get install -y powershell + if [ "$bypass_driver_installation" = true ] ; then + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + fi export PATH=/usr/local/cuda/bin:$PATH export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH export CUDACXX=/usr/local/cuda/bin/nvcc export CUDA_PATH=/usr/local/cuda export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda - features="full" + cuda_is_available=true elif [[ $(cut -f2 <<< $(lsb_release -r)) == "20.04" ]]; then - sudo apt update + sudo apt-get update + sudo apt-get install git ninja-build build-essential g++ nasm yasm + sudo apt-get install apt-transport-https ca-certificates gnupg software-properties-common wget + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null + sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main' + wget -q https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb + sudo dpkg -i packages-microsoft-prod.deb + sudo add-apt-repository universe + sudo apt-get update sudo apt-get dist-upgrade -y - #sudo apt-get install -y --no-install-recommends nvidia-cuda-dev nvidia-cuda-toolkit - sudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" - sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/ /" - sudo apt-get install -y --no-install-recommends cuda-compiler-11-2 cuda-libraries-dev-11-2 cuda-driver-dev-11-2 cuda-cudart-dev-11-2 - sudo apt-get install -y --no-install-recommends libcudnn8-dev - sudo rm -rf /usr/local/cuda - sudo ln -s /usr/local/cuda-11.2 /usr/local/cuda + sudo apt-get install -y cmake + sudo apt-get install -y powershell + if [ "$bypass_driver_installation" = true ] ; then + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/stubs/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so.1 + sudo ln -s /usr/local/cuda-11.2/lib64/stubs/libcuda.so /usr/local/cuda-11.2/lib64/libcuda.so + fi export PATH=/usr/local/cuda/bin:$PATH export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH export CUDACXX=/usr/local/cuda/bin/nvcc export CUDA_PATH=/usr/local/cuda export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda - features="full" + cuda_is_available=true else - echo "Unable to auto-install CUDA on this Linux OS" - features="opencv-base" + echo "Unable to provide tools on macOS, please wait for a future script update or do not put -InstallCUDA command line flag to continue" fi fi -else - if [[ -v CUDA_PATH ]]; then - features="full" - else - features="opencv-base" - fi fi cd .. -rm -rf $temp_folder +rm -rf "$temp_folder" -if [[ ! -v VCPKG_ROOT ]]; then - git clone https://github.com/microsoft/vcpkg - cd vcpkg - ./bootstrap-vcpkg.sh -disableMetrics - export VCPKG_ROOT=$(pwd) +if [[ -v CUDA_PATH ]]; then + ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -DisableInteractive -DoNotUpdateDARKNET + #./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN -EnableOPENCV_CUDA -DisableInteractive -DoNotUpdateDARKNET +else + ./build.ps1 -UseVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET fi - -$VCPKG_ROOT/vcpkg install darknet[${features}] diff --git a/scripts/windows/win_install_cygwin.cmd b/scripts/windows/win_install_cygwin.cmd deleted file mode 100644 index 6a2bda4a812..00000000000 --- a/scripts/windows/win_install_cygwin.cmd +++ /dev/null @@ -1,12 +0,0 @@ -echo Download file: https://www.cygwin.com/setup-x86_64.exe - - -setup-x86_64.exe -q -P dos2unix,wget,tar,untar,gzip,unzip,qawk,bzip2,git,vim,gcc-g++,make,grep,sed,find - - -# wget rawgit.com/transcode-open/apt-cyg/master/apt-cyg -# install apt-cyg /bin - -echo Finished - -pause \ No newline at end of file diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c index eeba5cc57b9..6729b031923 100644 --- a/src/batchnorm_layer.c +++ b/src/batchnorm_layer.c @@ -36,6 +36,12 @@ layer make_batchnorm_layer(int batch, int w, int h, int c, int train) layer.rolling_mean = (float*)xcalloc(c, sizeof(float)); layer.rolling_variance = (float*)xcalloc(c, sizeof(float)); + layer.mean_delta = (float*)xcalloc(c, sizeof(float)); + layer.variance_delta = (float*)xcalloc(c, sizeof(float)); + + layer.x = (float*)xcalloc(layer.batch*layer.outputs, sizeof(float)); + layer.x_norm = (float*)xcalloc(layer.batch*layer.outputs, sizeof(float)); + layer.forward = forward_batchnorm_layer; layer.backward = backward_batchnorm_layer; layer.update = update_batchnorm_layer; diff --git a/src/blas.h b/src/blas.h index ab888903b54..b69a702fa93 100644 --- a/src/blas.h +++ b/src/blas.h @@ -174,6 +174,9 @@ void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, floa void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size); void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type); +void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu); +void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu); + #endif // GPU #ifdef __cplusplus } diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index bc327995eed..85c55adfb82 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -2435,4 +2435,39 @@ extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int coord_conv_kernel << > > (dst, w, h, chan, b, type); CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file +} + + +__global__ void forward_implicit_kernel(int size, int batch, int nweights, float *weight_gpu, float *output_gpu) +{ + const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + + output_gpu[id] = weight_gpu[id % nweights]; +} + +extern "C" void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu) +{ + int size = batch * nweights; + forward_implicit_kernel << > > (size, batch, nweights, weight_gpu, output_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} + + + +__global__ void backward_implicit_kernel(int size, int batch, int nweights, float *weight_updates_gpu, float *delta_gpu) +{ + const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + + for (int i = 0; i < batch; ++i) { + weight_updates_gpu[id] += delta_gpu[id + i * nweights]; + } +} + +extern "C" void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu) +{ + int size = nweights; + backward_implicit_kernel << > > (size, batch, nweights, weight_updates_gpu, delta_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index ddd140c3088..1e9cdd9c739 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -165,6 +165,8 @@ half *cuda_make_f16_from_f32_array(float *src, size_t n) void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) { + if (l.train == 0) state.train = 0; + if (l.stream >= 0) { switch_stream(l.stream); } @@ -1219,8 +1221,8 @@ void pull_convolutional_layer(convolutional_layer l) { cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); cuda_pull_array_async(l.biases_gpu, l.biases, l.n); - cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); - cuda_pull_array_async(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.weight_updates_gpu) cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); + if (l.bias_updates_gpu) cuda_pull_array_async(l.bias_updates_gpu, l.bias_updates, l.n); if (l.batch_normalize){ cuda_pull_array_async(l.scales_gpu, l.scales, l.n); cuda_pull_array_async(l.rolling_mean_gpu, l.rolling_mean, l.n); diff --git a/src/csharp/CMakeLists.txt b/src/csharp/CMakeLists.txt new file mode 100644 index 00000000000..1b591a16dab --- /dev/null +++ b/src/csharp/CMakeLists.txt @@ -0,0 +1,19 @@ + +project(YoloCSharpWrapper LANGUAGES CSharp) +include(CSharpUtilities) + +add_library(${PROJECT_NAME} + ${PROJECT_NAME}.cs +) + +target_link_libraries(${PROJECT_NAME} PRIVATE dark) + +set_property(TARGET ${PROJECT_NAME} PROPERTY VS_DOTNET_REFERENCES + "System" + "System.Runtime.InteropServices" +) + +install(TARGETS ${PROJECT_NAME} + RUNTIME DESTINATION "${INSTALL_BIN_DIR}" + COMPONENT dev +) diff --git a/src/csharp/YoloCSharpWrapper.cs b/src/csharp/YoloCSharpWrapper.cs new file mode 100644 index 00000000000..35d23a9c2e1 --- /dev/null +++ b/src/csharp/YoloCSharpWrapper.cs @@ -0,0 +1,89 @@ +using System; +using System.Runtime.InteropServices; + +namespace Darknet +{ + public class YoloWrapper : IDisposable + { + private const string YoloLibraryName = "darknet.dll"; + private const int MaxObjects = 1000; + + [DllImport(YoloLibraryName, EntryPoint = "init")] + private static extern int InitializeYolo(string configurationFilename, string weightsFilename, int gpu); + + [DllImport(YoloLibraryName, EntryPoint = "detect_image")] + private static extern int DetectImage(string filename, ref BboxContainer container); + + [DllImport(YoloLibraryName, EntryPoint = "detect_mat")] + private static extern int DetectImage(IntPtr pArray, int nSize, ref BboxContainer container); + + [DllImport(YoloLibraryName, EntryPoint = "dispose")] + private static extern int DisposeYolo(); + + [StructLayout(LayoutKind.Sequential)] + public struct bbox_t + { + public UInt32 x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box + public float prob; // confidence - probability that the object was found correctly + public UInt32 obj_id; // class of object - from range [0, classes-1] + public UInt32 track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) + public UInt32 frames_counter; + public float x_3d, y_3d, z_3d; // 3-D coordinates, if there is used 3D-stereo camera + }; + + [StructLayout(LayoutKind.Sequential)] + public struct BboxContainer + { + [MarshalAs(UnmanagedType.ByValArray, SizeConst = MaxObjects)] + public bbox_t[] candidates; + } + + public YoloWrapper(string configurationFilename, string weightsFilename, int gpu) + { + InitializeYolo(configurationFilename, weightsFilename, gpu); + } + + public void Dispose() + { + DisposeYolo(); + } + + public bbox_t[] Detect(string filename) + { + var container = new BboxContainer(); + var count = DetectImage(filename, ref container); + + return container.candidates; + } + + public bbox_t[] Detect(byte[] imageData) + { + var container = new BboxContainer(); + + var size = Marshal.SizeOf(imageData[0]) * imageData.Length; + var pnt = Marshal.AllocHGlobal(size); + + try + { + // Copy the array to unmanaged memory. + Marshal.Copy(imageData, 0, pnt, imageData.Length); + var count = DetectImage(pnt, imageData.Length, ref container); + if (count == -1) + { + throw new NotSupportedException($"{YoloLibraryName} has no OpenCV support"); + } + } + catch (Exception exception) + { + return null; + } + finally + { + // Free the unmanaged memory. + Marshal.FreeHGlobal(pnt); + } + + return container.candidates; + } + } +} diff --git a/src/dark_cuda.c b/src/dark_cuda.c index 30509c51ee4..ceb43c8878b 100644 --- a/src/dark_cuda.c +++ b/src/dark_cuda.c @@ -123,8 +123,11 @@ cudaStream_t get_cuda_stream() { int i = cuda_get_device(); if (!streamInit[i]) { printf("Create CUDA-stream - %d \n", i); - //cudaError_t status = cudaStreamCreate(&streamsArray[i], cudaStreamNonBlocking); +#ifdef CUDNN cudaError_t status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamNonBlocking); +#else + cudaError_t status = cudaStreamCreate(&streamsArray[i]); +#endif if (status != cudaSuccess) { printf(" cudaStreamCreate error: %d \n", status); const char *s = cudaGetErrorString(status); @@ -236,6 +239,35 @@ static int switchCudnnInit[16]; #endif +void cublas_check_error(cublasStatus_t status) +{ +#if defined(DEBUG) || defined(CUDA_DEBUG) + cudaDeviceSynchronize(); +#endif + if (cuda_debug_sync) { + cudaDeviceSynchronize(); + } + if (status != CUBLAS_STATUS_SUCCESS) { + printf("cuBLAS Error\n"); + } +} + +void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time) +{ + if (status != CUBLAS_STATUS_SUCCESS) { + printf("\n cuBLAS status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); + } +#if defined(DEBUG) || defined(CUDA_DEBUG) + cuda_debug_sync = 1; +#endif + if (cuda_debug_sync) { + cudaError_t status = cudaDeviceSynchronize(); + if (status != CUDA_SUCCESS) + printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); + } + cublas_check_error(status); +} + static int blasInit[16] = { 0 }; static cublasHandle_t blasHandle[16]; @@ -243,9 +275,9 @@ cublasHandle_t blas_handle() { int i = cuda_get_device(); if (!blasInit[i]) { - cublasCreate(&blasHandle[i]); + CHECK_CUBLAS(cublasCreate(&blasHandle[i])); cublasStatus_t status = cublasSetStream(blasHandle[i], get_cuda_stream()); - CHECK_CUDA((cudaError_t)status); + CHECK_CUBLAS(status); blasInit[i] = 1; } return blasHandle[i]; diff --git a/src/dark_cuda.h b/src/dark_cuda.h index 10f6d89e385..9251e877672 100644 --- a/src/dark_cuda.h +++ b/src/dark_cuda.h @@ -56,7 +56,9 @@ extern "C" { #endif // __cplusplus void check_error(cudaError_t status); void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); + void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time); #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); +#define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); cublasHandle_t blas_handle(); void free_pinned_memory(); diff --git a/src/demo.c b/src/demo.c index d503aa7fcfc..ad1cc6e041c 100644 --- a/src/demo.c +++ b/src/demo.c @@ -36,6 +36,7 @@ static float demo_thresh = 0; static int demo_ext_output = 0; static long long int frame_id = 0; static int demo_json_port = -1; +static bool demo_skip_frame = false; static int avg_frames; @@ -59,6 +60,8 @@ void *fetch_in_thread(void *ptr) while (!custom_atomic_load_int(&flag_exit)) { while (!custom_atomic_load_int(&run_fetch_in_thread)) { if (custom_atomic_load_int(&flag_exit)) return 0; + if (demo_skip_frame) + consume_frame(cap); this_thread_yield(); } int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream @@ -168,9 +171,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int if(filename){ printf("video file: %s\n", filename); cap = get_capture_video_stream(filename); + demo_skip_frame = is_live_stream(filename); }else{ printf("Webcam index: %d\n", cam_index); cap = get_capture_webcam(cam_index); + demo_skip_frame = true; } if (!cap) { diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index bfef6991634..bd99a89dc6b 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -51,7 +51,8 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m l.outputs = h*w*n*(classes + 8 + 1); l.inputs = l.outputs; l.max_boxes = max_boxes; - l.truths = l.max_boxes*(4 + 1); + l.truth_size = 4 + 2; + l.truths = l.max_boxes*l.truth_size; l.delta = (float*)calloc(batch*l.outputs, sizeof(float)); l.output = (float*)calloc(batch*l.outputs, sizeof(float)); for(i = 0; i < total*2; ++i){ @@ -464,8 +465,8 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) float best_iou = 0; int best_t = 0; for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + box truth = float_to_box_stride(state.truth + t*l.truth_size + b*l.truths, 1); + int class_id = state.truth[t*l.truth_size + b*l.truths + 4]; if (class_id >= l.classes) { printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); printf(" truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id); @@ -496,7 +497,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if (l.objectness_smooth) { l.delta[obj_index] = l.obj_normalizer * (iou_multiplier - l.output[obj_index]); - int class_id = state.truth[best_match_t*(4 + 1) + b*l.truths + 4]; + int class_id = state.truth[best_match_t*l.truth_size + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); } @@ -518,19 +519,19 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) else l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]); //l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]); - int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]; + int class_id = state.truth[best_t*l.truth_size + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; if (l.objectness_smooth) l.delta[class_index + stride*class_id] = class_multiplier * (iou_multiplier - l.output[class_index + stride*class_id]); - box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); + box truth = float_to_box_stride(state.truth + best_t*l.truth_size + b*l.truths, 1); delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); } } } } for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); + box truth = float_to_box_stride(state.truth + t*l.truth_size + b*l.truths, 1); if(!truth.x) break; float best_iou = 0; @@ -564,7 +565,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if(mask_n >= 0){ - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + int class_id = state.truth[t*l.truth_size + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); @@ -597,7 +598,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) // iou, n if (iou > l.iou_thresh) { - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + int class_id = state.truth[t*l.truth_size + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); diff --git a/src/gemm.c b/src/gemm.c index 519751c0622..5f5c9689c67 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -8,13 +8,39 @@ #include #include #include -#ifdef _WIN32 -#include -#endif #if defined(_OPENMP) #include #endif +#if defined(_MSC_VER) +#if defined(_M_ARM) || defined(_M_ARM64) +static inline uint32_t popcnt(uint32_t v) { + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; +} +#define POPCNT(x) popcnt((x)) +#define POPCNT64(x) (popcnt((unsigned)(x)) + popcnt((unsigned)((uint64_t)(x) >> 32))) +#else +#include +#ifdef _WIN64 +#define POPCNT(x) __popcnt(x) +#define POPCNT64(x) __popcnt64(x) +#else +static inline int popcnt_64(uint64_t val64) { + int tmp_count = __popcnt(val64); + tmp_count += __popcnt(val64 >> 32); + return tmp_count; +} +#define POPCNT(x) __popcnt(x) +#define POPCNT64(x) popcnt_64(x) +#endif +#endif +#elif defined(__GNUC__) +#define POPCNT(x) __builtin_popcount(x) +#define POPCNT64(x) __builtin_popcountll(x) +#endif + #define TILE_M 4 // 4 ops #define TILE_N 16 // AVX2 = 2 ops * 8 floats #define TILE_K 16 // loop @@ -230,7 +256,7 @@ void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED, uint64_t b_bit64 = *((uint64_t *)(B + (k_ldb + j) / 8)); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); //printf("\n %d \n",__builtin_popcountll(c_bit64)); // gcc - printf("\n %d \n", __popcnt64(c_bit64)); // msvs + printf("\n %d \n", POPCNT64(c_bit64)); // msvs int h; for (h = 0; h < 64; ++h) @@ -298,11 +324,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); -#ifdef WIN32 - int tmp_count = __popcnt64(c_bit64); -#else - int tmp_count = __builtin_popcountll(c_bit64); -#endif + int tmp_count = POPCNT64(c_bit64); if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits count += tmp_count; @@ -503,17 +525,7 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, } } -static inline int popcnt_32(uint32_t val32) { -#ifdef WIN32 // Windows MSVS - int tmp_count = __popcnt(val32); -#else // Linux GCC - int tmp_count = __builtin_popcount(val32); -#endif - return tmp_count; -} -//---------------------------- - -#if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__)) +#if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__) && !defined(_M_ARM64)) #if (defined(_WIN64) && !defined(__MINGW64__)) #include @@ -925,14 +937,14 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, // waiting for - CPUID Flags: AVX512VPOPCNTDQ: __m512i _mm512_popcnt_epi32(__m512i a) __m256 count = _mm256_setr_ps( - popcnt_32(_mm256_extract_epi32(xnor256, 0)), - popcnt_32(_mm256_extract_epi32(xnor256, 1)), - popcnt_32(_mm256_extract_epi32(xnor256, 2)), - popcnt_32(_mm256_extract_epi32(xnor256, 3)), - popcnt_32(_mm256_extract_epi32(xnor256, 4)), - popcnt_32(_mm256_extract_epi32(xnor256, 5)), - popcnt_32(_mm256_extract_epi32(xnor256, 6)), - popcnt_32(_mm256_extract_epi32(xnor256, 7))); + POPCNT(_mm256_extract_epi32(xnor256, 0)), + POPCNT(_mm256_extract_epi32(xnor256, 1)), + POPCNT(_mm256_extract_epi32(xnor256, 2)), + POPCNT(_mm256_extract_epi32(xnor256, 3)), + POPCNT(_mm256_extract_epi32(xnor256, 4)), + POPCNT(_mm256_extract_epi32(xnor256, 5)), + POPCNT(_mm256_extract_epi32(xnor256, 6)), + POPCNT(_mm256_extract_epi32(xnor256, 7))); __m256 val2 = _mm256_set1_ps(2); count = _mm256_mul_ps(count, val2); // count * 2 @@ -952,7 +964,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, { PUT_IN_REGISTER uint32_t B_PART = B[s*ldb + j]; uint32_t xnor_result = ~(A_PART ^ B_PART); - int32_t count = popcnt_32(xnor_result); // must be Signed int + int32_t count = POPCNT(xnor_result); // must be Signed int C[i*ldc + j] += (2 * count - 32) * mean_val; } @@ -1140,13 +1152,7 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, static inline int popcnt128(__m128i n) { const __m128i n_hi = _mm_unpackhi_epi64(n, n); -#if defined(_MSC_VER) - return __popcnt64(_mm_cvtsi128_si64(n)) + __popcnt64(_mm_cvtsi128_si64(n_hi)); -#elif defined(__APPLE__) && defined(__clang__) - return _mm_popcnt_u64(_mm_cvtsi128_si64(n)) + _mm_popcnt_u64(_mm_cvtsi128_si64(n_hi)); -#else - return __popcntq(_mm_cvtsi128_si64(n)) + __popcntq(_mm_cvtsi128_si64(n_hi)); -#endif + return POPCNT64(_mm_cvtsi128_si64(n)) + POPCNT64(_mm_cvtsi128_si64(n_hi)); } static inline int popcnt256(__m256i n) { @@ -2021,7 +2027,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, PUT_IN_REGISTER uint32_t B_PART = B[s * ldb + j]; uint32_t xnor_result = ~(A_PART ^ B_PART); //printf(" xnor_result = %d, ", xnor_result); - int32_t count = popcnt_32(xnor_result); // must be Signed int + int32_t count = POPCNT(xnor_result); // must be Signed int C[i*ldc + j] += (2 * count - 32) * mean_val; //c[i*n + j] += count*mean; @@ -2079,25 +2085,6 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, } } -static inline int popcnt_64(uint64_t val64) { -#ifdef WIN32 // Windows -#ifdef _WIN64 // Windows 64-bit - int tmp_count = __popcnt64(val64); -#else // Windows 32-bit - int tmp_count = __popcnt(val64); - tmp_count += __popcnt(val64 >> 32); -#endif -#else // Linux -#if defined(__x86_64__) || defined(__aarch64__) // Linux 64-bit - int tmp_count = __builtin_popcountll(val64); -#else // Linux 32-bit - int tmp_count = __builtin_popcount(val64); - tmp_count += __builtin_popcount(val64 >> 32); -#endif -#endif - return tmp_count; -} - void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, unsigned char *A, int lda, unsigned char *B, int ldb, @@ -2118,7 +2105,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - int tmp_count = popcnt_64(c_bit64); + int tmp_count = POPCNT64(c_bit64); if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits count += tmp_count; @@ -2518,7 +2505,7 @@ void gemm_nn_bin_transposed_32bit_packed(int M, int N, int K, float ALPHA, PUT_IN_REGISTER uint32_t A_PART = ((uint32_t*)A)[i*lda + s]; PUT_IN_REGISTER uint32_t B_PART = ((uint32_t*)B)[j * ldb + s]; uint32_t xnor_result = ~(A_PART ^ B_PART); - int32_t count = popcnt_32(xnor_result); // must be Signed int + int32_t count = POPCNT(xnor_result); // must be Signed int val += (2 * count - 32) * mean_val; } @@ -2581,7 +2568,7 @@ void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, floa uint32_t weight = ((uint32_t *)packed_weights)[fil*new_lda / 32 + chan*size*size + f_y*size + f_x]; uint32_t xnor_result = ~(input ^ weight); - int32_t count = popcnt_32(xnor_result); // mandatory Signed int + int32_t count = POPCNT(xnor_result); // mandatory Signed int sum += (2 * count - 32) * mean_val; } } diff --git a/src/http_stream.cpp b/src/http_stream.cpp index 3ec7e851593..b17edfb5d36 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -70,8 +70,12 @@ static int close_socket(SOCKET s) { #define SOCKADDR struct sockaddr #define SOCKADDR_IN struct sockaddr_in #define ADDRPOINTER unsigned int* +#ifndef INVALID_SOCKET #define INVALID_SOCKET -1 +#endif +#ifndef SOCKET_ERROR #define SOCKET_ERROR -1 +#endif struct _IGNORE_PIPE_SIGNAL { struct sigaction new_actn, old_actn; @@ -934,4 +938,3 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim } } } - diff --git a/src/httplib.h b/src/httplib.h index 41fbfb19419..e4678faeeb2 100644 --- a/src/httplib.h +++ b/src/httplib.h @@ -126,7 +126,10 @@ using socket_t = SOCKET; #include using socket_t = int; -#define INVALID_SOCKET (-1) + +#ifndef INVALID_SOCKET +#define INVALID_SOCKET -1 +#endif #endif //_WIN32 #include diff --git a/src/image.c b/src/image.c index ef238d9d81f..e918af04517 100644 --- a/src/image.c +++ b/src/image.c @@ -1355,7 +1355,7 @@ void make_image_red(image im) } } -image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c) +image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c, float alpha) { image attention_img; attention_img.w = w; @@ -1383,7 +1383,7 @@ image make_attention_image(int img_size, float *original_delta_cpu, float *origi image resized = resize_image(attention_img, w / 4, h / 4); attention_img = resize_image(resized, w, h); free_image(resized); - for (k = 0; k < img_size; ++k) attention_img.data[k] += original_input_cpu[k]; + for (k = 0; k < img_size; ++k) attention_img.data[k] = attention_img.data[k]*alpha + (1-alpha)*original_input_cpu[k]; //normalize_image(attention_img); //show_image(attention_img, "delta"); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index add27c96ef0..2524f1593ca 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -385,6 +385,17 @@ extern "C" void resize_window_cv(char const* window_name, int width, int height) } // ---------------------------------------- +extern "C" void move_window_cv(char const* window_name, int x, int y) +{ + try { + cv::moveWindow(window_name, x, y); + } + catch (...) { + cerr << "OpenCV exception: create_window_cv \n"; + } +} +// ---------------------------------------- + extern "C" void destroy_all_windows_cv() { try { @@ -834,6 +845,15 @@ extern "C" image get_image_from_stream_letterbox(cap_cv *cap, int w, int h, int } // ---------------------------------------- +extern "C" void consume_frame(cap_cv *cap){ + cv::Mat *src = NULL; + src = (cv::Mat *)get_capture_frame_cv(cap); + if (src) + delete src; +} +// ---------------------------------------- + + // ==================================================================== // Image Saving // ==================================================================== diff --git a/src/image_opencv.h b/src/image_opencv.h index 6fa6cb5c6b0..19d16e1d9b5 100644 --- a/src/image_opencv.h +++ b/src/image_opencv.h @@ -48,6 +48,7 @@ image mat_to_image_cv(mat_cv *mat); // Window void create_window_cv(char const* window_name, int full_screen, int width, int height); void resize_window_cv(char const* window_name, int width, int height); +void move_window_cv(char const* window_name, int x, int y); void destroy_all_windows_cv(); int wait_key_cv(int delay); int wait_until_press_key_cv(); @@ -83,7 +84,7 @@ int set_capture_position_frame_cv(cap_cv *cap, int index); image get_image_from_stream_cpp(cap_cv *cap); image get_image_from_stream_resize(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close); image get_image_from_stream_letterbox(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close); - +void consume_frame(cap_cv *cap); // Image Saving void save_cv_png(mat_cv *img, const char *name); diff --git a/src/network.c b/src/network.c index 600be1b2d7d..c41932479fd 100644 --- a/src/network.c +++ b/src/network.c @@ -273,7 +273,7 @@ void forward_network(network net, network_state state) for(i = 0; i < net.n; ++i){ state.index = i; layer l = net.layers[i]; - if(l.delta && state.train){ + if(l.delta && state.train && l.train){ scal_cpu(l.outputs * l.batch, 0, l.delta, 1); } //double time = get_time_point(); @@ -297,6 +297,7 @@ void update_network(network net) float rate = get_current_rate(net); for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; + if (l.train == 0) continue; if(l.update){ l.update(l, update_batch, rate, net.momentum, net.decay); } @@ -1458,6 +1459,7 @@ void copy_weights_net(network net_train, network *net_map) } net_map->layers[k].batch = 1; net_map->layers[k].steps = 1; + net_map->layers[k].train = 0; } } diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 9f4c640b3e9..ac3403cf547 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -235,12 +235,25 @@ void backward_network_gpu(network net, network_state state) cuda_pull_array(original_input, original_input_cpu, img_size); cuda_pull_array(original_delta, original_delta_cpu, img_size); - image attention_img = make_attention_image(img_size, original_delta_cpu, original_input_cpu, net.w, net.h, net.c); + image attention_img = make_attention_image(img_size, original_delta_cpu, original_input_cpu, net.w, net.h, net.c, 0.7); show_image(attention_img, "attention_img"); resize_window_cv("attention_img", 500, 500); + //static int img_counter = 0; + //img_counter++; + //char buff[256]; + //sprintf(buff, "attention_img_%d.png", img_counter); + //save_image_png(attention_img, buff); free_image(attention_img); + image attention_mask_img = make_attention_image(img_size, original_delta_cpu, original_delta_cpu, net.w, net.h, net.c, 1.0); + show_image(attention_mask_img, "attention_mask_img"); + resize_window_cv("attention_mask_img", 500, 500); + + //sprintf(buff, "attention_mask_img_%d.png", img_counter); + //save_image_png(attention_mask_img, buff); + free_image(attention_mask_img); + free(original_input_cpu); free(original_delta_cpu); } @@ -271,6 +284,8 @@ void update_network_gpu(network net) float rate = get_current_rate(net); for(i = 0; i < net.n; ++i){ layer l = net.layers[i]; + if (l.train == 0) continue; + l.t = get_current_batch(net); if (iteration_num > (net.max_batches * 1 / 2)) l.deform = 0; if (l.burnin_update && (l.burnin_update*net.burn_in > iteration_num)) continue; diff --git a/src/parser.c b/src/parser.c index e7498d9daee..1a345b5cc1b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -39,6 +39,11 @@ #include "version.h" #include "yolo_layer.h" #include "gaussian_yolo_layer.h" +#include "representation_layer.h" + +void empty_func(dropout_layer l, network_state state) { + //l.output_gpu = state.input; +} typedef struct{ char *type; @@ -90,7 +95,9 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[contrastive]") == 0) return CONTRASTIVE; if (strcmp(type, "[route]")==0) return ROUTE; if (strcmp(type, "[upsample]") == 0) return UPSAMPLE; - if (strcmp(type, "[empty]") == 0) return EMPTY; + if (strcmp(type, "[empty]") == 0 + || strcmp(type, "[silence]") == 0) return EMPTY; + if (strcmp(type, "[implicit]") == 0) return IMPLICIT; return BLANK; } @@ -1036,6 +1043,17 @@ layer parse_sam(list *options, size_params params, network net) return s; } +layer parse_implicit(list *options, size_params params, network net) +{ + float mean_init = option_find_float(options, "mean", 0.0); + float std_init = option_find_float(options, "std", 0.2); + int filters = option_find_int(options, "filters", 128); + int atoms = option_find_int_quiet(options, "atoms", 1); + + layer s = make_implicit_layer(params.batch, params.index, mean_init, std_init, filters, atoms); + + return s; +} layer parse_activation(list *options, size_params params) { @@ -1348,8 +1366,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) net.gpu_index = gpu_index; size_params params; - if (batch > 0) params.train = 0; // allocates memory for Detection only - else params.train = 1; // allocates memory for Detection & Training + if (batch > 0) params.train = 0; // allocates memory for Inference only + else params.train = 1; // allocates memory for Inference & Training section *s = (section *)n->val; list *options = s->options; @@ -1377,6 +1395,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) params.net = net; printf("mini_batch = %d, batch = %d, time_steps = %d, train = %d \n", net.batch, net.batch * net.subdivisions, net.time_steps, params.train); + int last_stop_backward = -1; int avg_outputs = 0; int avg_counter = 0; float bflops = 0; @@ -1390,8 +1409,32 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) n = n->next; int count = 0; free_section(s); + + // find l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + node *n_tmp = n; + int count_tmp = 0; + if (params.train == 1) { + while (n_tmp) { + s = (section *)n_tmp->val; + options = s->options; + int stopbackward = option_find_int_quiet(options, "stopbackward", 0); + if (stopbackward == 1) { + last_stop_backward = count_tmp; + printf("last_stop_backward = %d \n", last_stop_backward); + } + n_tmp = n_tmp->next; + ++count_tmp; + } + } + + int old_params_train = params.train; + fprintf(stderr, " layer filters size/strd(dil) input output\n"); while(n){ + + params.train = old_params_train; + if (count < last_stop_backward) params.train = 0; + params.index = count; fprintf(stderr, "%4d ", count); s = (section *)n->val; @@ -1460,7 +1503,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) int k; for (k = 0; k < l.n; ++k) { net.layers[l.input_layers[k]].use_bin_output = 0; - net.layers[l.input_layers[k]].keep_delta_gpu = 1; + if (count >= last_stop_backward) + net.layers[l.input_layers[k]].keep_delta_gpu = 1; } }else if (lt == UPSAMPLE) { l = parse_upsample(options, params, net); @@ -1468,7 +1512,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) l = parse_shortcut(options, params, net); net.layers[count - 1].use_bin_output = 0; net.layers[l.index].use_bin_output = 0; - net.layers[l.index].keep_delta_gpu = 1; + if (count >= last_stop_backward) + net.layers[l.index].keep_delta_gpu = 1; }else if (lt == SCALE_CHANNELS) { l = parse_scale_channels(options, params, net); net.layers[count - 1].use_bin_output = 0; @@ -1480,6 +1525,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) net.layers[count - 1].use_bin_output = 0; net.layers[l.index].use_bin_output = 0; net.layers[l.index].keep_delta_gpu = 1; + } else if (lt == IMPLICIT) { + l = parse_implicit(options, params, net); }else if(lt == DROPOUT){ l = parse_dropout(options, params); l.output = net.layers[count-1].output; @@ -1492,16 +1539,25 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) } else if (lt == EMPTY) { layer empty_layer = {(LAYER_TYPE)0}; - empty_layer.out_w = params.w; - empty_layer.out_h = params.h; - empty_layer.out_c = params.c; l = empty_layer; + l.type = EMPTY; + l.w = l.out_w = params.w; + l.h = l.out_h = params.h; + l.c = l.out_c = params.c; + l.batch = params.batch; + l.inputs = l.outputs = params.inputs; l.output = net.layers[count - 1].output; l.delta = net.layers[count - 1].delta; + l.forward = empty_func; + l.backward = empty_func; #ifdef GPU l.output_gpu = net.layers[count - 1].output_gpu; l.delta_gpu = net.layers[count - 1].delta_gpu; + l.keep_delta_gpu = 1; + l.forward_gpu = empty_func; + l.backward_gpu = empty_func; #endif + fprintf(stderr, "empty \n"); }else{ fprintf(stderr, "Type not recognized: %s\n", s->type); } @@ -1561,7 +1617,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) #ifdef GPU // futher GPU-memory optimization: net.optimized_memory == 2 l.optimized_memory = net.optimized_memory; - if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT) + if (net.optimized_memory == 1 && params.train && l.type != DROPOUT) { + if (l.delta_gpu) { + cuda_free(l.delta_gpu); + l.delta_gpu = NULL; + } + } else if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT) { if (l.output_gpu) { cuda_free(l.output_gpu); @@ -1604,6 +1665,9 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); option_unused(options); + + if (l.stopbackward == 1) printf(" ------- previous layers are frozen ------- \n"); + net.layers[count] = l; if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; if (l.inputs > max_inputs) max_inputs = l.inputs; @@ -1632,6 +1696,27 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) avg_counter++; } } + + if (last_stop_backward > -1) { + int k; + for (k = 0; k < last_stop_backward; ++k) { + layer l = net.layers[k]; + if (l.keep_delta_gpu) { + if (!l.delta) { + net.layers[k].delta = (float*)xcalloc(l.outputs*l.batch, sizeof(float)); + } +#ifdef GPU + if (!l.delta_gpu) { + net.layers[k].delta_gpu = (float *)cuda_make_array(NULL, l.outputs*l.batch); + } +#endif + } + + net.layers[k].onlyforward = 1; + net.layers[k].train = 0; + } + } + free_list(sections); #ifdef GPU @@ -1657,6 +1742,9 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) } l.delta_gpu = net.global_delta_gpu; } + else { + if (!l.delta_gpu) l.delta_gpu = (float *)cuda_make_array(NULL, l.outputs*l.batch); + } // maximum optimization if (net.optimized_memory >= 3 && l.type != DROPOUT) { @@ -1810,6 +1898,24 @@ void save_shortcut_weights(layer l, FILE *fp) fwrite(l.weights, sizeof(float), num, fp); } +void save_implicit_weights(layer l, FILE *fp) +{ +#ifdef GPU + if (gpu_index >= 0) { + pull_implicit_layer(l); + //printf("\n pull_implicit_layer \n"); + } +#endif + int i; + //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]); + //printf(" l.nweights = %d - update \n", l.nweights); + //for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); + //printf(" l.nweights = %d \n\n", l.nweights); + + int num = l.nweights; + fwrite(l.weights, sizeof(float), num, fp); +} + void save_convolutional_weights(layer l, FILE *fp) { if(l.binary){ @@ -1921,6 +2027,8 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema) } } if (l.type == SHORTCUT && l.nweights > 0) { save_shortcut_weights(l, fp); + } if (l.type == IMPLICIT) { + save_implicit_weights(l, fp); } if(l.type == CONNECTED){ save_connected_weights(l, fp); } if(l.type == BATCHNORM){ @@ -1976,6 +2084,7 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema) fwrite(l.biases, sizeof(float), l.outputs, fp); fwrite(l.weights, sizeof(float), size, fp); } + fflush(fp); } fclose(fp); } @@ -2131,6 +2240,21 @@ void load_shortcut_weights(layer l, FILE *fp) #endif } +void load_implicit_weights(layer l, FILE *fp) +{ + int num = l.nweights; + int read_bytes; + read_bytes = fread(l.weights, sizeof(float), num, fp); + if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); + //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); + //printf(" read_bytes = %d \n\n", read_bytes); +#ifdef GPU + if (gpu_index >= 0) { + push_implicit_layer(l); + } +#endif +} + void load_weights_upto(network *net, char *filename, int cutoff) { #ifdef GPU @@ -2175,6 +2299,9 @@ void load_weights_upto(network *net, char *filename, int cutoff) if (l.type == SHORTCUT && l.nweights > 0) { load_shortcut_weights(l, fp); } + if (l.type == IMPLICIT) { + load_implicit_weights(l, fp); + } if(l.type == CONNECTED){ load_connected_weights(l, fp, transpose); } diff --git a/src/region_layer.c b/src/region_layer.c index 7aa1a196f80..b7aba32e1a9 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -20,6 +20,10 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int l.batch = batch; l.h = h; l.w = w; + l.c = n*(classes + coords + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; l.classes = classes; l.coords = coords; l.cost = (float*)xcalloc(1, sizeof(float)); diff --git a/src/representation_layer.c b/src/representation_layer.c new file mode 100644 index 00000000000..fe7741fa9db --- /dev/null +++ b/src/representation_layer.c @@ -0,0 +1,159 @@ +#include "representation_layer.h" +#include "utils.h" +#include "dark_cuda.h" +#include "blas.h" +#include +#include + +layer make_implicit_layer(int batch, int index, float mean_init, float std_init, int filters, int atoms) +{ + fprintf(stderr,"implicit Layer: %d x %d \t mean=%.2f, std=%.2f \n", filters, atoms, mean_init, std_init); + layer l = { (LAYER_TYPE)0 }; + l.type = IMPLICIT; + l.batch = batch; + l.w = 1; + l.h = 1; + l.c = 1; + + l.out_w = 1; + l.out_h = atoms; + l.out_c = filters; + + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = 1; + l.index = index; + + l.nweights = l.out_w * l.out_h * l.out_c; + + l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float)); + l.weights = (float*)xcalloc(l.nweights, sizeof(float)); + int i; + for (i = 0; i < l.nweights; ++i) l.weights[i] = mean_init + rand_uniform(-std_init, std_init); + + + l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float)); + l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); + + l.forward = forward_implicit_layer; + l.backward = backward_implicit_layer; + l.update = update_implicit_layer; +#ifdef GPU + l.forward_gpu = forward_implicit_layer_gpu; + l.backward_gpu = backward_implicit_layer_gpu; + l.update_gpu = update_implicit_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + l.weights_gpu = cuda_make_array(l.weights, l.nweights); +#endif + return l; +} + +void resize_implicit_layer(layer *l, int w, int h) +{ +} + +void forward_implicit_layer(const layer l, network_state state) +{ + int i; + #pragma omp parallel for + for (i = 0; i < l.nweights * l.batch; ++i) { + l.output[i] = l.weights[i % l.nweights]; + } +} + +void backward_implicit_layer(const layer l, network_state state) +{ + int i; + for (i = 0; i < l.nweights * l.batch; ++i) { + l.weight_updates[i % l.nweights] += l.delta[i]; + } +} + +void update_implicit_layer(layer l, int batch, float learning_rate_init, float momentum, float decay) +{ + float learning_rate = learning_rate_init*l.learning_rate_scale; + //float momentum = a.momentum; + //float decay = a.decay; + //int batch = a.batch; + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); + +} + + +#ifdef GPU +void forward_implicit_layer_gpu(const layer l, network_state state) +{ + forward_implicit_gpu(l.batch, l.nweights, l.weights_gpu, l.output_gpu); +} + +void backward_implicit_layer_gpu(const layer l, network_state state) +{ + backward_implicit_gpu(l.batch, l.nweights, l.weight_updates_gpu, l.delta_gpu); +} + +void update_implicit_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale) +{ + // Loss scale for Mixed-Precision on Tensor-Cores + float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale; + //float momentum = a.momentum; + //float decay = a.decay; + //int batch = a.batch; + + reset_nan_and_inf(l.weight_updates_gpu, l.nweights); + fix_nan_and_inf(l.weights_gpu, l.nweights); + + if (l.adam) { + //adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.nweights, batch, l.t); + } + else { + //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + //axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + //scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_ongpu(l.nweights, -decay*batch*loss_scale, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + + scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); + } + + if (l.clip) { + constrain_ongpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + +void pull_implicit_layer(layer l) +{ + cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); + + if (l.adam) { + cuda_pull_array_async(l.m_gpu, l.m, l.nweights); + cuda_pull_array_async(l.v_gpu, l.v, l.nweights); + } + CHECK_CUDA(cudaPeekAtLastError()); + cudaStreamSynchronize(get_cuda_stream()); +} + +void push_implicit_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + + if (l.train) { + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + } + if (l.adam) { + cuda_push_array(l.m_gpu, l.m, l.nweights); + cuda_push_array(l.v_gpu, l.v, l.nweights); + } + CHECK_CUDA(cudaPeekAtLastError()); +} +#endif + + diff --git a/src/representation_layer.h b/src/representation_layer.h new file mode 100644 index 00000000000..8b2a9da3082 --- /dev/null +++ b/src/representation_layer.h @@ -0,0 +1,29 @@ +#ifndef REPRESENTATION_LAYER_H +#define REPRESENTATION_LAYER_H + +#include "layer.h" +#include "network.h" + +#ifdef __cplusplus +extern "C" { +#endif +layer make_implicit_layer(int batch, int index, float mean_init, float std_init, int filters, int atoms); +void forward_implicit_layer(const layer l, network_state state); +void backward_implicit_layer(const layer l, network_state state); +void update_implicit_layer(layer l, int batch, float learning_rate_init, float momentum, float decay); + +void resize_implicit_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_implicit_layer_gpu(const layer l, network_state state); +void backward_implicit_layer_gpu(const layer l, network_state state); + +void update_implicit_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale); +void pull_implicit_layer(layer l); +void push_implicit_layer(layer l); +#endif + +#ifdef __cplusplus +} +#endif +#endif // REPRESENTATION_LAYER_H diff --git a/src/utils.c b/src/utils.c index fe5c2062148..e4a2298e762 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1047,3 +1047,8 @@ unsigned long custom_hash(char *str) return hash; } + +bool is_live_stream(const char * path){ + const char *url_schema = "://"; + return (NULL != strstr(path, url_schema)); +} diff --git a/src/utils.h b/src/utils.h index 9a154ea6267..a217b425a2e 100644 --- a/src/utils.h +++ b/src/utils.h @@ -5,6 +5,7 @@ #include #include +#include #ifndef M_PI #define M_PI 3.14159265358979323846 // pi @@ -95,6 +96,7 @@ int max_int_index(int *a, int n); boxabs box_to_boxabs(const box* b, const int img_w, const int img_h, const int bounds_check); int make_directory(char *path, int mode); unsigned long custom_hash(char *str); +bool is_live_stream(const char * path); #define max_val_cmp(a,b) (((a) > (b)) ? (a) : (b)) #define min_val_cmp(a,b) (((a) < (b)) ? (a) : (b)) diff --git a/vcpkg.json b/vcpkg.json index a7f66f260fc..30caffb7f88 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,12 +1,11 @@ { "name": "darknet", - "version-string": "0.2.5.4", - "port-version": 2, - "homepage": "https://github.com/alexeyab/darknet", + "version-date": "2021-04-16", "description": "Darknet is an open source neural network framework written in C and CUDA. You only look once (YOLO) is a state-of-the-art, real-time object detection system, best example of darknet functionalities.", + "homepage": "https://github.com/alexeyab/darknet", "dependencies": [ - "stb", - "pthreads" + "pthreads", + "stb" ], "features": { "cuda": { @@ -22,6 +21,19 @@ "cudnn" ] }, + "full": { + "description": "Build darknet fully featured", + "dependencies": [ + { + "name": "darknet", + "features": [ + "cuda", + "cudnn", + "opencv-cuda" + ] + } + ] + }, "opencv-base": { "description": "Build darknet with support for latest version of OpenCV", "dependencies": [ @@ -55,7 +67,6 @@ { "name": "opencv2", "features": [ - "contrib", "ffmpeg" ] } @@ -67,7 +78,6 @@ { "name": "opencv2", "features": [ - "contrib", "cuda", "ffmpeg" ]