From 69601cd8b0860156a5fca0a61580bf10fe5aa367 Mon Sep 17 00:00:00 2001 From: acxz Date: Mon, 10 Jun 2019 12:44:18 -0400 Subject: [PATCH 01/86] added cmake build instructions and reorganized some install instructions --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 52df11995a8..97b0bae824c 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,12 @@ More details: http://pjreddie.com/darknet/yolo/ 0. [Improvements in this repository](#improvements-in-this-repository) 1. [How to use](#how-to-use-on-the-command-line) -2. [How to compile on Linux](#how-to-compile-on-linux) +2. How to compile on Linux + * [Using cmake](#how-to-compile-on-linux-using-cmake) + * [Using make](#how-to-compile-on-linux-using-make) 3. How to compile on Windows * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg) + * [Using Cmake-GUI](#how-to-compile-on-windows-using-Cmake-GUI) * [Legacy way](#how-to-compile-on-windows-legacy-way) 4. [How to train (Pascal VOC Data)](#how-to-train-pascal-voc-data) 5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu) @@ -50,10 +53,6 @@ More details: http://pjreddie.com/darknet/yolo/ * **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported * on Linux **GCC or Clang**, on Windows **MSVC 2015/2017/2019** https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community -Compiling on **Windows** by using `Cmake-GUI` as on this [**IMAGE**](https://user-images.githubusercontent.com/4096485/55107892-6becf380-50e3-11e9-9a0a-556a943c429a.png): Configure -> Optional platform for generator (Set: x64) -> Finish -> Generate -> Open Project -> x64 & Release -> Build -> Build solution - -Compiling on **Linux** by using command `make` (or alternative way by using command: `cmake . && make` ) - #### Pre-trained models There are weights-file for different cfg-files (smaller size -> faster speed & lower accuracy: @@ -154,7 +153,23 @@ On Linux find executable file `./darknet` in the root directory, while on Window * Yolo v3 COCO-model: `darknet.exe detector demo data/coco.data yolov3.cfg yolov3.weights http://192.168.0.80:8080/video?dummy=param.mjpg -i 0` -### How to compile on Linux +### How to compile on Linux (using `cmake`) + +The `CMakeLists.txt` will attempt to find installed optional dependencies like +CUDA, cudnn, ZED and build against those. It will also create a shared object +library file to use `darknet` for code development. + +Inside the cloned repository: + +``` +mkdir build-release +cd build-release +cmake .. +make +make install +``` + +### How to compile on Linux (using `make`) Just do `make` in the darknet directory. Before make, you can set such options in the `Makefile`: [link](https://github.com/AlexeyAB/darknet/blob/9c1b9a2cf6363546c152251be578a21f3c3caec6/Makefile#L1) @@ -201,6 +216,19 @@ PS Code\vcpkg> .\vcpkg install pthreads opencv[ffmpeg] #replace with ope 9. Open Powershell, go to the `darknet` folder and build with the command `.\build.ps1`. If you want to use Visual Studio, you will find two custom solutions created for you by CMake after the build, one in `build_win_debug` and the other in `build_win_release`, containing all the appropriate config flags for your system. +### How to compile on Windows (using `Cmake-GUI`) + +Using `Cmake-GUI` as shown here on this [**IMAGE**](https://user-images.githubusercontent.com/4096485/55107892-6becf380-50e3-11e9-9a0a-556a943c429a.png): + +1. Configure +2. Optional platform for generator (Set: x64) +3. Finish +4. Generate +5. Open Project +6. x64 & Release +7. Build +8. Build solution + ### How to compile on Windows (legacy way) 1. If you have **CUDA 10.0, cuDNN 7.4 and OpenCV 3.x** (with paths: `C:\opencv_3.0\opencv\build\include` & `C:\opencv_3.0\opencv\build\x64\vc14\lib`), then open `build\darknet\darknet.sln`, set **x64** and **Release** https://hsto.org/webt/uh/fk/-e/uhfk-eb0q-hwd9hsxhrikbokd6u.jpeg and do the: Build -> Build darknet. Also add Windows system variable `CUDNN` with path to CUDNN: https://user-images.githubusercontent.com/4096485/53249764-019ef880-36ca-11e9-8ffe-d9cf47e7e462.jpg @@ -596,8 +624,12 @@ With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6` ## How to use Yolo as DLL and SO libraries -* on Linux - set `LIBSO=1` in the `Makefile` and do `make` -* on Windows - compile `build\darknet\yolo_cpp_dll.sln` or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution +* on Linux + * build `darknet` using `cmake` or + * set `LIBSO=1` in the `Makefile` and do `make` +* on Windows + * compile `build\darknet\yolo_cpp_dll.sln` solution or + * compile `build\darknet\yolo_cpp_dll_no_gpu.sln` solution There are 2 APIs: * C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h From fead96a0022eb5b160f87324478d41c60f5166a9 Mon Sep 17 00:00:00 2001 From: acxz Date: Mon, 10 Jun 2019 13:00:05 -0400 Subject: [PATCH 02/86] fixed link to cmake-gui section --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 97b0bae824c..521c2ab59f4 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ More details: http://pjreddie.com/darknet/yolo/ * [Using make](#how-to-compile-on-linux-using-make) 3. How to compile on Windows * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg) - * [Using Cmake-GUI](#how-to-compile-on-windows-using-Cmake-GUI) + * [Using Cmake-GUI](#how-to-compile-on-windows-using-cmake-gui) * [Legacy way](#how-to-compile-on-windows-legacy-way) 4. [How to train (Pascal VOC Data)](#how-to-train-pascal-voc-data) 5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu) From de07ab6924913c868e8d4bc1e24a5167e8e20a66 Mon Sep 17 00:00:00 2001 From: acxz Date: Wed, 12 Jun 2019 12:14:25 -0400 Subject: [PATCH 03/86] added more ways to create so an dll files --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 521c2ab59f4..2f85a17c15e 100644 --- a/README.md +++ b/README.md @@ -625,11 +625,13 @@ With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6` ## How to use Yolo as DLL and SO libraries * on Linux + * using `build.sh` or * build `darknet` using `cmake` or * set `LIBSO=1` in the `Makefile` and do `make` * on Windows - * compile `build\darknet\yolo_cpp_dll.sln` solution or - * compile `build\darknet\yolo_cpp_dll_no_gpu.sln` solution + * using `build.ps1` or + * build `darknet` using `cmake` or + * compile `build\darknet\yolo_cpp_dll.sln` solution or `build\darknet\yolo_cpp_dll_no_gpu.sln` solution There are 2 APIs: * C API: https://github.com/AlexeyAB/darknet/blob/master/include/darknet.h From 993af0fb5b5ec8fe3303d73cbbbcee2938b3051f Mon Sep 17 00:00:00 2001 From: shooorf Date: Wed, 26 Jun 2019 20:47:09 +0300 Subject: [PATCH 04/86] Check if image does not require resizing --- src/image.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/image.c b/src/image.c index 2f085801dbe..995fa849ddd 100644 --- a/src/image.c +++ b/src/image.c @@ -1275,6 +1275,8 @@ float bilinear_interpolate(image im, float x, float y, int c) image resize_image(image im, int w, int h) { + if (im.w == w && im.h == h) return copy_image(im); + image resized = make_image(w, h, im.c); image part = make_image(w, im.h, im.c); int r, c, k; From 8d80a65288df8aac8a7080e17bcdea6136bd186d Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 27 Aug 2019 14:04:39 +0300 Subject: [PATCH 05/86] Max pool layer can use stride=2 or stride_x=2 stride_y=4 (isn't tested well) --- include/darknet.h | 2 ++ src/box.c | 2 ++ src/maxpool_layer.c | 30 ++++++++++++++++++------------ src/maxpool_layer.h | 2 +- src/maxpool_layer_kernels.cu | 31 ++++++++++++++++--------------- src/parser.c | 14 ++++++++------ 6 files changed, 47 insertions(+), 34 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 5d87a8323a4..5cfd274db52 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -208,6 +208,8 @@ struct layer { int size; int side; int stride; + int stride_x; + int stride_y; int dilation; int maxpool_depth; int out_channels; diff --git a/src/box.c b/src/box.c index 640f54a299e..1b5c4998a6b 100644 --- a/src/box.c +++ b/src/box.c @@ -207,6 +207,8 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U); } + // GIoU = I/U - (C-U)/C + // C is the smallest convex hull that encloses both Detection and Truth if (iou_loss == GIOU) { if (C > 0) { // apply "C" term from gIOU diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 4d2ee49f57f..000efe90663 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -34,8 +34,8 @@ void cudnn_maxpool_setup(layer *l) l->size, l->pad/2, //0, //l.pad, l->pad/2, //0, //l.pad, - l->stride, - l->stride); + l->stride_x, + l->stride_y); cudnnCreateTensorDescriptor(&l->srcTensorDesc); cudnnCreateTensorDescriptor(&l->dstTensorDesc); @@ -45,7 +45,7 @@ void cudnn_maxpool_setup(layer *l) } -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels) +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels) { maxpool_layer l = { (LAYER_TYPE)0 }; l.type = MAXPOOL; @@ -62,14 +62,16 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.out_h = l.h; } else { - l.out_w = (w + padding - size) / stride + 1; - l.out_h = (h + padding - size) / stride + 1; + l.out_w = (w + padding - size) / stride_x + 1; + l.out_h = (h + padding - size) / stride_y + 1; l.out_c = c; } l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = h*w*c; l.size = size; - l.stride = stride; + l.stride = stride_x; + l.stride_x = stride_x; + l.stride_y = stride_y; int output_size = l.out_h * l.out_w * l.out_c * batch; l.indexes = (int*)calloc(output_size, sizeof(int)); l.output = (float*)calloc(output_size, sizeof(float)); @@ -87,7 +89,11 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s #endif // GPU l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; - fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + if(stride_x == stride_y) + fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + else + fprintf(stderr, "max %d x %d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + return l; } @@ -97,8 +103,8 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) l->w = w; l->inputs = h*w*l->c; - l->out_w = (w + l->pad - l->size) / l->stride + 1; - l->out_h = (h + l->pad - l->size) / l->stride + 1; + l->out_w = (w + l->pad - l->size) / l->stride_x + 1; + l->out_h = (h + l->pad - l->size) / l->stride_y + 1; l->outputs = l->out_w * l->out_h * l->out_c; int output_size = l->outputs * l->batch; @@ -151,7 +157,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) } - if (!state.train) { + if (!state.train && l.stride_x == l.stride_y) { forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch); return; } @@ -173,8 +179,8 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) int max_i = -1; for(n = 0; n < l.size; ++n){ for(m = 0; m < l.size; ++m){ - int cur_h = h_offset + i*l.stride + n; - int cur_w = w_offset + j*l.stride + m; + int cur_h = h_offset + i*l.stride_y + n; + int cur_w = w_offset + j*l.stride_x + m; int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); int valid = (cur_h >= 0 && cur_h < l.h && cur_w >= 0 && cur_w < l.w); diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 0c1f6148946..4994d45700d 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -12,7 +12,7 @@ typedef layer maxpool_layer; extern "C" { #endif image get_maxpool_image(maxpool_layer l); -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels); void resize_maxpool_layer(maxpool_layer *l, int w, int h); void forward_maxpool_layer(const maxpool_layer l, network_state state); void backward_maxpool_layer(const maxpool_layer l, network_state state); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 82d631b358a..8e8511003e5 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -49,10 +49,10 @@ __global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c, } -__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) +__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output, int *indexes) { - int h = (in_h + pad - size) / stride + 1; - int w = (in_w + pad - size) / stride + 1; + int h = (in_h + pad - size) / stride_y + 1; + int w = (in_w + pad - size) / stride_x + 1; int c = in_c; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -75,8 +75,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c int l, m; for(l = 0; l < size; ++l){ for(m = 0; m < size; ++m){ - int cur_h = h_offset + i*stride + l; - int cur_w = w_offset + j*stride + m; + int cur_h = h_offset + i*stride_y + l; + int cur_w = w_offset + j*stride_x + m; int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); int valid = (cur_h >= 0 && cur_h < in_h && cur_w >= 0 && cur_w < in_w); @@ -89,12 +89,13 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c indexes[out_index] = max_i; } -__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) +__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *delta, float *prev_delta, int *indexes) { - int h = (in_h + pad - size) / stride + 1; - int w = (in_w + pad - size) / stride + 1; + int h = (in_h + pad - size) / stride_y + 1; + int w = (in_w + pad - size) / stride_x + 1; int c = in_c; - int area = (size-1)/stride; + int area_x = (size - 1) / stride_x; + int area_y = (size - 1) / stride_y; int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if(id >= n) return; @@ -113,10 +114,10 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_ float d = 0; int l, m; - for(l = -area; l < area+1; ++l){ - for(m = -area; m < area+1; ++m){ - int out_w = (j-w_offset)/stride + m; - int out_h = (i-h_offset)/stride + l; + for(l = -area_y; l < area_y+1; ++l){ + for(m = -area_x; m < area_x+1; ++m){ + int out_w = (j-w_offset)/stride_x + m; + int out_h = (i-h_offset)/stride_y + l; int out_index = out_w + w*(out_h + h*(k + c*b)); int valid = (out_w >= 0 && out_w < w && out_h >= 0 && out_h < h); @@ -172,7 +173,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta size_t n = h*w*c*layer.batch; - forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); + forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); } @@ -192,6 +193,6 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st size_t n = layer.h*layer.w*layer.c*layer.batch; - backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); + backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/parser.c b/src/parser.c index 09e79d2df75..ac8f9613fae 100644 --- a/src/parser.c +++ b/src/parser.c @@ -535,6 +535,8 @@ layer parse_reorg_old(list *options, size_params params) maxpool_layer parse_maxpool(list *options, size_params params) { int stride = option_find_int(options, "stride",1); + int stride_x = option_find_int_quiet(options, "stride_x", stride); + int stride_y = option_find_int_quiet(options, "stride_y", stride); int size = option_find_int(options, "size",stride); int padding = option_find_int_quiet(options, "padding", size-1); int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0); @@ -547,7 +549,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) batch=params.batch; if(!(h && w && c)) error("Layer before maxpool layer must output image."); - maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride, padding, maxpool_depth, out_channels); + maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels); return layer; } @@ -1332,12 +1334,12 @@ void load_convolutional_weights(layer l, FILE *fp) //return; } int num = l.nweights; - fread(l.biases, sizeof(float), l.n, fp); + if (fread(l.biases, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); //fread(l.weights, sizeof(float), num, fp); // as in connected layer if (l.batch_normalize && (!l.dontloadscales)){ - fread(l.scales, sizeof(float), l.n, fp); - fread(l.rolling_mean, sizeof(float), l.n, fp); - fread(l.rolling_variance, sizeof(float), l.n, fp); + if(fread(l.scales, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); + if(fread(l.rolling_mean, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); + if(fread(l.rolling_variance, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); if(0){ int i; for(i = 0; i < l.n; ++i){ @@ -1354,7 +1356,7 @@ void load_convolutional_weights(layer l, FILE *fp) fill_cpu(l.n, 0, l.rolling_variance, 1); } } - fread(l.weights, sizeof(float), num, fp); + if(fread(l.weights, sizeof(float), num, fp) < num) printf("\n Warning: Unexpected end of wights-file! \n"); //if(l.adam){ // fread(l.m, sizeof(float), num, fp); // fread(l.v, sizeof(float), num, fp); From 4acf924aaf7e61b07b0d8387347674d782e32cce Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 29 Aug 2019 18:30:33 +0300 Subject: [PATCH 06/86] minor fix --- src/parser.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index ac8f9613fae..3883d041d4b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1334,12 +1334,12 @@ void load_convolutional_weights(layer l, FILE *fp) //return; } int num = l.nweights; - if (fread(l.biases, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); + if (fread(l.biases, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.biase - l.index = %d \n", l.index); //fread(l.weights, sizeof(float), num, fp); // as in connected layer if (l.batch_normalize && (!l.dontloadscales)){ - if(fread(l.scales, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); - if(fread(l.rolling_mean, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); - if(fread(l.rolling_variance, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! \n"); + if(fread(l.scales, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index); + if(fread(l.rolling_mean, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index); + if(fread(l.rolling_variance, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index); if(0){ int i; for(i = 0; i < l.n; ++i){ @@ -1356,7 +1356,7 @@ void load_convolutional_weights(layer l, FILE *fp) fill_cpu(l.n, 0, l.rolling_variance, 1); } } - if(fread(l.weights, sizeof(float), num, fp) < num) printf("\n Warning: Unexpected end of wights-file! \n"); + if(fread(l.weights, sizeof(float), num, fp) < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); //if(l.adam){ // fread(l.m, sizeof(float), num, fp); // fread(l.v, sizeof(float), num, fp); From 58906ef812323444181386c7dd4e6dda8a377a48 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 29 Aug 2019 23:34:04 +0300 Subject: [PATCH 07/86] minor fix: Unexpected end of wights-file! --- src/parser.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/parser.c b/src/parser.c index 3883d041d4b..6cc4790bf32 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1327,6 +1327,12 @@ void load_convolutional_weights_binary(layer l, FILE *fp) #endif } +void check_read_size(size_t read_bytes, int required_bytes) +{ + if (read_bytes > 0 && read_bytes < required_bytes) return 0; + return 1; +} + void load_convolutional_weights(layer l, FILE *fp) { if(l.binary){ @@ -1334,12 +1340,17 @@ void load_convolutional_weights(layer l, FILE *fp) //return; } int num = l.nweights; - if (fread(l.biases, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.biase - l.index = %d \n", l.index); + int read_bytes; + read_bytes = fread(l.biases, sizeof(float), l.n, fp); + if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.biases - l.index = %d \n", l.index); //fread(l.weights, sizeof(float), num, fp); // as in connected layer if (l.batch_normalize && (!l.dontloadscales)){ - if(fread(l.scales, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index); - if(fread(l.rolling_mean, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index); - if(fread(l.rolling_variance, sizeof(float), l.n, fp) < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index); + read_bytes = fread(l.scales, sizeof(float), l.n, fp); + if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index); + read_bytes = fread(l.rolling_mean, sizeof(float), l.n, fp); + if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index); + read_bytes = fread(l.rolling_variance, sizeof(float), l.n, fp); + if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index); if(0){ int i; for(i = 0; i < l.n; ++i){ @@ -1356,7 +1367,8 @@ void load_convolutional_weights(layer l, FILE *fp) fill_cpu(l.n, 0, l.rolling_variance, 1); } } - if(fread(l.weights, sizeof(float), num, fp) < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); + read_bytes = fread(l.weights, sizeof(float), num, fp); + if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); //if(l.adam){ // fread(l.m, sizeof(float), num, fp); // fread(l.v, sizeof(float), num, fp); @@ -1473,8 +1485,9 @@ void load_weights_upto(network *net, char *filename, int cutoff) } #endif } + if (feof(fp)) break; } - fprintf(stderr, "Done!\n"); + fprintf(stderr, "Done! Loaded %d layers from weights-file \n", i); fclose(fp); } From 660a9b225cb30ad06397b5c39e9612d9eff59ea9 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 30 Aug 2019 00:29:26 +0300 Subject: [PATCH 08/86] compile fix --- src/parser.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/parser.c b/src/parser.c index 6cc4790bf32..48bd42bd3ce 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1327,12 +1327,6 @@ void load_convolutional_weights_binary(layer l, FILE *fp) #endif } -void check_read_size(size_t read_bytes, int required_bytes) -{ - if (read_bytes > 0 && read_bytes < required_bytes) return 0; - return 1; -} - void load_convolutional_weights(layer l, FILE *fp) { if(l.binary){ From 2a382e5a4bb55a6cca636f3d7025be2cfbdfc68a Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 30 Aug 2019 20:28:56 +0300 Subject: [PATCH 09/86] Fix training TridentNet --- src/layer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/layer.c b/src/layer.c index c0b48afc2b8..68d1b35b68a 100644 --- a/src/layer.c +++ b/src/layer.c @@ -12,6 +12,7 @@ void free_sublayer(layer *l) void free_layer(layer l) { + if (l.share_layer != NULL) return; // don't free shared layers if (l.type == CONV_LSTM) { if (l.peephole) { free_sublayer(l.vf); From 102ab710a9ee13c8a160923301d15ae8fcff6188 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 1 Sep 2019 14:11:17 +0300 Subject: [PATCH 10/86] Fixed fuse_conv_batchnorm() for TridentNet --- src/convolutional_layer.c | 1 + src/network.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 6ff5b8b3d4f..93ac79a0711 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -550,6 +550,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.bflops = (2.0 * l.nweights * l.out_h*l.out_w) / 1000000000.; if (l.xnor && l.use_bin_output) fprintf(stderr, "convXB"); else if (l.xnor) fprintf(stderr, "convX "); + else if(l.share_layer) fprintf(stderr, "convS "); else fprintf(stderr, "conv "); if(groups > 1) fprintf(stderr, "%5d/%4d ", n, groups); diff --git a/src/network.c b/src/network.c index 6e64a8ceb56..9bdab8adce1 100644 --- a/src/network.c +++ b/src/network.c @@ -1033,6 +1033,10 @@ void fuse_conv_batchnorm(network net) if (l->type == CONVOLUTIONAL) { //printf(" Merges Convolutional-%d and batch_norm \n", j); + if (l->share_layer != NULL) { + l->batch_normalize = 0; + } + if (l->batch_normalize) { int f; for (f = 0; f < l->n; ++f) From a4a06c337a082bba02da2077bfb687dadf3bf580 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 1 Sep 2019 16:07:33 +0300 Subject: [PATCH 11/86] Addex stride_x= and stride_y= for [convolutional] layer for MatrixNet --- src/conv_lstm_layer.c | 22 +++++++++++----------- src/convolutional_kernels.cu | 8 ++++---- src/convolutional_layer.c | 33 +++++++++++++++++++-------------- src/convolutional_layer.h | 2 +- src/crnn_layer.c | 6 +++--- src/parser.c | 9 ++++++--- 6 files changed, 44 insertions(+), 36 deletions(-) diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c index 6cbaf1c3911..5da2bab39e0 100644 --- a/src/conv_lstm_layer.c +++ b/src/conv_lstm_layer.c @@ -66,44 +66,44 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // U l.uf = (layer*)calloc(1, sizeof(layer)); - *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.uf->batch = batch; if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; l.ui = (layer*)calloc(1, sizeof(layer)); - *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.ui->batch = batch; if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; l.ug = (layer*)calloc(1, sizeof(layer)); - *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.ug->batch = batch; if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; l.uo = (layer*)calloc(1, sizeof(layer)); - *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.uo->batch = batch; if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; // W l.wf = (layer*)calloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wf->batch = batch; if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; l.wi = (layer*)calloc(1, sizeof(layer)); - *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wi->batch = batch; if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; l.wg = (layer*)calloc(1, sizeof(layer)); - *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wg->batch = batch; if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; l.wo = (layer*)calloc(1, sizeof(layer)); - *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.wo->batch = batch; if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; @@ -111,21 +111,21 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // V l.vf = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.vf->batch = batch; if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; } l.vi = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.vi->batch = batch; if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; } l.vo = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.vo->batch = batch; if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; } diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index e404ecabd85..07a0a0d7121 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -177,7 +177,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) fast_binarize_weights_gpu(l.weights_gpu, l.n, (l.c / l.groups)*l.size*l.size, l.binary_weights_gpu, l.mean_arr_gpu); } - if (l.align_bit_weights_gpu && !state.train && l.c >= 32) + if (l.align_bit_weights_gpu && !state.train && l.c >= 32 && l.stride_x == l.stride_y) { //return; //cudaError_t status = cudaSuccess; @@ -574,7 +574,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) l.h, l.w, // input size (h, w) l.size, l.size, // kernel size (h, w) l.pad, l.pad, // padding (h, w) - l.stride, l.stride, // stride (h, w) + l.stride_y, l.stride_x, // stride (h, w) l.dilation, l.dilation, // dilation (h, w) state.workspace); // output @@ -819,7 +819,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state l.h, l.w, // input size (h, w) l.size, l.size, // kernel size (h, w) l.pad, l.pad, // padding (h, w) - l.stride, l.stride, // stride (h, w) + l.stride_y, l.stride_x, // stride (h, w) l.dilation, l.dilation, // dilation (h, w) state.workspace); // output //gemm_ongpu(0, 1, m, n, k, 1, a + i*m*k, k, b, k, 1, c, n); @@ -844,7 +844,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state l.h, l.w, // input size (h, w) l.size, l.size, // kernel size (h, w) l.pad, l.pad, // padding size (h, w) - l.stride, l.stride, // stride size (h, w) + l.stride_y, l.stride_x, // stride size (h, w) l.dilation, l.dilation, // dilation size (h, w) delta); // output (delta) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 93ac79a0711..207e3f27cf6 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -76,12 +76,12 @@ void binarize_input(float *input, int n, int size, float *binary) int convolutional_out_height(convolutional_layer l) { - return (l.h + 2*l.pad - l.size) / l.stride + 1; + return (l.h + 2*l.pad - l.size) / l.stride_y + 1; } int convolutional_out_width(convolutional_layer l) { - return (l.w + 2*l.pad - l.size) / l.stride + 1; + return (l.w + 2*l.pad - l.size) / l.stride_x + 1; } image get_convolutional_image(convolutional_layer l) @@ -276,9 +276,9 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) //printf("\n l->dilation = %d, l->pad = %d, l->size = %d \n", l->dilation, l->pad, l->size); #if(CUDNN_MAJOR >= 6) - CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad* l->dilation, l->stride, l->stride, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT)); // cudnn >= 6.0 + CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad* l->dilation, l->stride_y, l->stride_x, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT)); // cudnn >= 6.0 #else - CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad * l->dilation, l->stride, l->stride, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION)); // cudnn 5.1 + CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad * l->dilation, l->stride_y, l->stride_x, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION)); // cudnn 5.1 #endif int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; @@ -332,7 +332,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) #endif #endif -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer) +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer) { int total_batch = batch*steps; int i; @@ -354,7 +354,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.use_bin_output = use_bin_output; l.batch = batch; l.steps = steps; - l.stride = stride; + l.stride = stride_x; + l.stride_y = stride_x; + l.stride_x = stride_y; l.dilation = dilation; l.size = size; l.pad = padding; @@ -553,11 +555,14 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, else if(l.share_layer) fprintf(stderr, "convS "); else fprintf(stderr, "conv "); - if(groups > 1) fprintf(stderr, "%5d/%4d ", n, groups); + if (groups > 1) fprintf(stderr, "%5d/%4d ", n, groups); else fprintf(stderr, "%5d ", n); - if(dilation > 1) fprintf(stderr, "%2d x%2d/%2d(%1d)", size, size, stride, dilation); - else fprintf(stderr, "%2d x%2d/%2d ", size, size, stride); + if (stride_x != stride_y) fprintf(stderr, "%2d x%2d/%2dx%2d ", size, size, stride_x, stride_y); + else { + if (dilation > 1) fprintf(stderr, "%2d x%2d/%2d(%1d)", size, size, stride_x, dilation); + else fprintf(stderr, "%2d x%2d/%2d ", size, size, stride_x); + } fprintf(stderr, "%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); @@ -583,7 +588,7 @@ void denormalize_convolutional_layer(convolutional_layer l) void test_convolutional_layer() { - convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, NULL); + convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, NULL); l.batch_normalize = 1; float data[] = {1,1,1,1,1, 1,1,1,1,1, @@ -921,7 +926,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) //gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); //gemm_nn_custom(m, n, k, 1, a, k, b, n, c, n); - if (l.xnor && l.align_bit_weights && !state.train) + if (l.xnor && l.align_bit_weights && !state.train && l.stride_x == l.stride_y) { memset(b, 0, l.bit_align*l.size*l.size*l.c * sizeof(float)); @@ -1053,7 +1058,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) l.h, l.w, // input size (h, w) l.size, l.size, // kernel size (h, w) l.pad, l.pad, // padding (h, w) - l.stride, l.stride, // stride (h, w) + l.stride_y, l.stride_x, // stride (h, w) l.dilation, l.dilation, // dilation (h, w) b); // output @@ -1229,7 +1234,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state) l.h, l.w, // input size (h, w) l.size, l.size, // kernel size (h, w) l.pad, l.pad, // padding (h, w) - l.stride, l.stride, // stride (h, w) + l.stride_y, l.stride_x, // stride (h, w) l.dilation, l.dilation, // dilation (h, w) b); // output @@ -1251,7 +1256,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state) l.h, l.w, // input size (h, w) l.size, l.size, // kernel size (h, w) l.pad, l.pad, // padding (h, w) - l.stride, l.stride, // stride (h, w) + l.stride_y, l.stride_x, // stride (h, w) l.dilation, l.dilation, // dilation (h, w) state.delta + (i*l.groups + j)* (l.c / l.groups)*l.h*l.w); // output (delta) } diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index e62b155c45f..1167175ccf4 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -30,7 +30,7 @@ void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); #endif size_t get_convolutional_workspace_size(layer l); -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer); +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer); void denormalize_convolutional_layer(convolutional_layer l); void resize_convolutional_layer(convolutional_layer *layer, int w, int h); void forward_convolutional_layer(const convolutional_layer layer, network_state state); diff --git a/src/crnn_layer.c b/src/crnn_layer.c index 7609003b4f2..eaded279099 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -50,17 +50,17 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float)); l.input_layer = (layer*)calloc(1, sizeof(layer)); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.input_layer->batch = batch; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; l.self_layer = (layer*)calloc(1, sizeof(layer)); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.self_layer->batch = batch; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; l.output_layer = (layer*)calloc(1, sizeof(layer)); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); l.output_layer->batch = batch; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; diff --git a/src/parser.c b/src/parser.c index 48bd42bd3ce..8283f7ed0f0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -158,6 +158,8 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int groups = option_find_int_quiet(options, "groups", 1); int size = option_find_int(options, "size",1); int stride = option_find_int(options, "stride",1); + int stride_x = option_find_int_quiet(options, "stride_x", stride); + int stride_y = option_find_int_quiet(options, "stride_y", stride); int dilation = option_find_int_quiet(options, "dilation", 1); if (size == 1) dilation = 1; int pad = option_find_int_quiet(options, "pad",0); @@ -167,9 +169,10 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); - int share_index = option_find_int_quiet(options, "share_index", -1); + int share_index = option_find_int_quiet(options, "share_index", -1000000000); convolutional_layer *share_layer = NULL; - if(share_index > -1) share_layer = &net.layers[share_index]; + if(share_index >= 0) share_layer = &net.layers[share_index]; + else if(share_index != -1000000000) share_layer = &net.layers[params.index + share_index]; int batch,h,w,c; h = params.h; @@ -182,7 +185,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int xnor = option_find_int_quiet(options, "xnor", 0); int use_bin_output = option_find_int_quiet(options, "bin_output", 0); - convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer); + convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); layer.assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); From 11b8b2bddf32190a83dbf7868881520b1e564377 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 2 Sep 2019 02:13:11 +0300 Subject: [PATCH 12/86] Added antialiasing=1 param for [convolutional]-layer on GPU --- include/darknet.h | 2 ++ src/conv_lstm_layer.c | 22 +++++++-------- src/convolutional_kernels.cu | 24 +++++++++++++++++ src/convolutional_layer.c | 52 ++++++++++++++++++++++++++++++++++-- src/convolutional_layer.h | 2 +- src/crnn_layer.c | 6 ++--- src/layer.c | 4 +++ src/parser.c | 19 +++++++++---- 8 files changed, 109 insertions(+), 22 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 5cfd274db52..a7a62b47bd7 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -211,6 +211,7 @@ struct layer { int stride_x; int stride_y; int dilation; + int antialiasing; int maxpool_depth; int out_channels; int reverse; @@ -528,6 +529,7 @@ struct layer { float * scale_updates_gpu; float * scale_change_gpu; + float * input_antialiasing_gpu; float * output_gpu; float * output_sigmoid_gpu; float * loss_gpu; diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c index 5da2bab39e0..a6da3bf0c2c 100644 --- a/src/conv_lstm_layer.c +++ b/src/conv_lstm_layer.c @@ -66,44 +66,44 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // U l.uf = (layer*)calloc(1, sizeof(layer)); - *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.uf->batch = batch; if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; l.ui = (layer*)calloc(1, sizeof(layer)); - *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.ui->batch = batch; if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; l.ug = (layer*)calloc(1, sizeof(layer)); - *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.ug->batch = batch; if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; l.uo = (layer*)calloc(1, sizeof(layer)); - *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.uo->batch = batch; if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; // W l.wf = (layer*)calloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.wf->batch = batch; if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; l.wi = (layer*)calloc(1, sizeof(layer)); - *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.wi->batch = batch; if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; l.wg = (layer*)calloc(1, sizeof(layer)); - *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.wg->batch = batch; if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; l.wo = (layer*)calloc(1, sizeof(layer)); - *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.wo->batch = batch; if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; @@ -111,21 +111,21 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // V l.vf = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.vf->batch = batch; if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; } l.vi = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.vi->batch = batch; if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; } l.vo = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.vo->batch = batch; if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; } diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 07a0a0d7121..b476ac76e3d 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -604,10 +604,34 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) if (state.net.try_fix_nan) { fix_nan_and_inf(l.output_gpu, l.outputs*l.batch); } + + if (l.antialiasing) { + network_state s = { 0 }; + s.train = state.train; + s.workspace = state.workspace; + s.net = state.net; + if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) + s.input = l.output_gpu; + forward_convolutional_layer_gpu(*(l.input_layer), s); + simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu); + simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu); + } } void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) { + if (l.antialiasing) { + network_state s = { 0 }; + s.train = state.train; + s.workspace = state.workspace; + s.net = state.net; + s.delta = l.delta_gpu; + s.input = l.input_antialiasing_gpu; + //if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) + simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.delta_gpu, l.input_layer->delta_gpu); + backward_convolutional_layer_gpu(*(l.input_layer), s); + } + if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.output_sigmoid_gpu, l.delta_gpu); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 207e3f27cf6..11402721545 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -332,7 +332,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) #endif #endif -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer) +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer) { int total_batch = batch*steps; int i; @@ -342,6 +342,13 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (xnor) groups = 1; // disable groups for XNOR-net if (groups < 1) groups = 1; + const int blur_stride_x = stride_x; + const int blur_stride_y = stride_y; + l.antialiasing = antialiasing; + if (antialiasing) { + stride_x = stride_y = l.stride = l.stride_x = l.stride_y = 1; // use stride=1 in host-layer + } + l.share_layer = share_layer; l.index = index; l.h = h; @@ -568,6 +575,47 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, //fprintf(stderr, "%5d/%2d %2d x%2d /%2d(%d)%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, groups, size, size, stride, dilation, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + if (l.antialiasing) { + printf("AA: "); + l.input_layer = (layer*)calloc(1, sizeof(layer)); + const int blur_size = 3; + *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL); + const int blur_nweights = n * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; + int i; + for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { + /* + l.input_layer->weights[i + 0] = 0; + l.input_layer->weights[i + 1] = 0; + l.input_layer->weights[i + 2] = 0; + + l.input_layer->weights[i + 3] = 0; + l.input_layer->weights[i + 4] = 1; + l.input_layer->weights[i + 5] = 0; + + l.input_layer->weights[i + 6] = 0; + l.input_layer->weights[i + 7] = 0; + l.input_layer->weights[i + 8] = 0; + */ + l.input_layer->weights[i + 0] = 1 / 16.f; + l.input_layer->weights[i + 1] = 2 / 16.f; + l.input_layer->weights[i + 2] = 1 / 16.f; + + l.input_layer->weights[i + 3] = 2 / 16.f; + l.input_layer->weights[i + 4] = 4 / 16.f; + l.input_layer->weights[i + 5] = 2 / 16.f; + + l.input_layer->weights[i + 6] = 1 / 16.f; + l.input_layer->weights[i + 7] = 2 / 16.f; + l.input_layer->weights[i + 8] = 1 / 16.f; + + } + for (i = 0; i < n; ++i) l.input_layer->biases[i] = 0; +#ifdef GPU + l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); + push_convolutional_layer(*(l.input_layer)); +#endif // GPU + } + return l; } @@ -588,7 +636,7 @@ void denormalize_convolutional_layer(convolutional_layer l) void test_convolutional_layer() { - convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, NULL); + convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL); l.batch_normalize = 1; float data[] = {1,1,1,1,1, 1,1,1,1,1, diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 1167175ccf4..1012663a5b3 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -30,7 +30,7 @@ void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); #endif size_t get_convolutional_workspace_size(layer l); -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, convolutional_layer *share_layer); +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer); void denormalize_convolutional_layer(convolutional_layer l); void resize_convolutional_layer(convolutional_layer *layer, int w, int h); void forward_convolutional_layer(const convolutional_layer layer, network_state state); diff --git a/src/crnn_layer.c b/src/crnn_layer.c index eaded279099..e3114fc9497 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -50,17 +50,17 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float)); l.input_layer = (layer*)calloc(1, sizeof(layer)); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.input_layer->batch = batch; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; l.self_layer = (layer*)calloc(1, sizeof(layer)); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.self_layer->batch = batch; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; l.output_layer = (layer*)calloc(1, sizeof(layer)); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL); + *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); l.output_layer->batch = batch; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; diff --git a/src/layer.c b/src/layer.c index 68d1b35b68a..b6ae95dba12 100644 --- a/src/layer.c +++ b/src/layer.c @@ -13,6 +13,9 @@ void free_sublayer(layer *l) void free_layer(layer l) { if (l.share_layer != NULL) return; // don't free shared layers + if (l.antialiasing) { + free_sublayer(l.input_layer); + } if (l.type == CONV_LSTM) { if (l.peephole) { free_sublayer(l.vf); @@ -167,6 +170,7 @@ void free_layer(layer l) if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu), l.bias_updates_gpu = NULL; if (l.scales_gpu) cuda_free(l.scales_gpu), l.scales_gpu = NULL; if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu), l.scale_updates_gpu = NULL; + if (l.input_antialiasing_gpu) cuda_free(l.input_antialiasing_gpu), l.input_antialiasing_gpu = NULL; if (l.output_gpu) cuda_free(l.output_gpu), l.output_gpu = NULL; if (l.output_sigmoid_gpu) cuda_free(l.output_sigmoid_gpu), l.output_sigmoid_gpu = NULL; if (l.delta_gpu) cuda_free(l.delta_gpu), l.delta_gpu = NULL; diff --git a/src/parser.c b/src/parser.c index 8283f7ed0f0..fda2bacc041 100644 --- a/src/parser.c +++ b/src/parser.c @@ -161,6 +161,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int stride_x = option_find_int_quiet(options, "stride_x", stride); int stride_y = option_find_int_quiet(options, "stride_y", stride); int dilation = option_find_int_quiet(options, "dilation", 1); + int antialiasing = option_find_int_quiet(options, "antialiasing", 0); if (size == 1) dilation = 1; int pad = option_find_int_quiet(options, "pad",0); int padding = option_find_int_quiet(options, "padding",0); @@ -185,7 +186,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int xnor = option_find_int_quiet(options, "xnor", 0); int use_bin_output = option_find_int_quiet(options, "bin_output", 0); - convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer); + convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); layer.assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); @@ -991,10 +992,18 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) n = n->next; ++count; if(n){ - params.h = l.out_h; - params.w = l.out_w; - params.c = l.out_c; - params.inputs = l.outputs; + if (l.antialiasing) { + params.h = l.input_layer->out_h; + params.w = l.input_layer->out_w; + params.c = l.input_layer->out_c; + params.inputs = l.input_layer->outputs; + } + else { + params.h = l.out_h; + params.w = l.out_w; + params.c = l.out_c; + params.inputs = l.outputs; + } } if (l.bflops > 0) bflops += l.bflops; } From 80ceee4fca9fd44082e587ecb70c45cbc8e26dca Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 2 Sep 2019 14:24:54 +0300 Subject: [PATCH 13/86] Added antialiasing=1 param for [convolutional]-layer on CPU (only forward inference) --- src/convolutional_layer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 11402721545..10c1f324188 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1135,6 +1135,18 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) //wait_until_press_key_cv(); if(l.assisted_excitation && state.train) assisted_excitation_forward(l, state); + + if (l.antialiasing) { + network_state s = { 0 }; + s.train = state.train; + s.workspace = state.workspace; + s.net = state.net; + if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) + s.input = l.output; + forward_convolutional_layer(*(l.input_layer), s); + //simple_copy_ongpu(l.outputs*l.batch, l.output, l.input_antialiasing); + memcpy(l.output, l.input_layer->output, l.input_layer->outputs * l.input_layer->batch * sizeof(float)); + } } static box float_to_box_stride(float *f, int stride) From 9e26472b1ac245a87289b1a90af3007cd937d94b Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 2 Sep 2019 15:25:42 +0300 Subject: [PATCH 14/86] Added antialiasing=1 param for [maxpool]-layer on GPU and CPU --- src/convolutional_layer.c | 3 +- src/maxpool_layer.c | 115 +++++++++++++++++++++++++++-------- src/maxpool_layer.h | 2 +- src/maxpool_layer_kernels.cu | 41 ++++++++++--- src/parser.c | 3 +- 5 files changed, 126 insertions(+), 38 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 10c1f324188..c5c59576349 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -576,7 +576,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, //fprintf(stderr, "%5d/%2d %2d x%2d /%2d(%d)%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, groups, size, size, stride, dilation, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); if (l.antialiasing) { - printf("AA: "); + printf("AA: "); l.input_layer = (layer*)calloc(1, sizeof(layer)); const int blur_size = 3; *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL); @@ -1141,7 +1141,6 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) s.train = state.train; s.workspace = state.workspace; s.net = state.net; - if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) s.input = l.output; forward_convolutional_layer(*(l.input_layer), s); //simple_copy_ongpu(l.outputs*l.batch, l.output, l.input_antialiasing); diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 000efe90663..1239262197d 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -1,4 +1,5 @@ #include "maxpool_layer.h" +#include "convolutional_layer.h" #include "dark_cuda.h" #include "gemm.h" #include @@ -45,10 +46,18 @@ void cudnn_maxpool_setup(layer *l) } -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels) +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing) { maxpool_layer l = { (LAYER_TYPE)0 }; l.type = MAXPOOL; + + const int blur_stride_x = stride_x; + const int blur_stride_y = stride_y; + l.antialiasing = antialiasing; + if (antialiasing) { + stride_x = stride_y = l.stride = l.stride_x = l.stride_y = 1; // use stride=1 in host-layer + } + l.batch = batch; l.h = h; l.w = w; @@ -94,6 +103,46 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s else fprintf(stderr, "max %d x %d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + if (l.antialiasing) { + printf("AA: "); + l.input_layer = (layer*)calloc(1, sizeof(layer)); + const int blur_size = 3; + *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL); + const int blur_nweights = l.out_c * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; + int i; + for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { + /* + l.input_layer->weights[i + 0] = 0; + l.input_layer->weights[i + 1] = 0; + l.input_layer->weights[i + 2] = 0; + + l.input_layer->weights[i + 3] = 0; + l.input_layer->weights[i + 4] = 1; + l.input_layer->weights[i + 5] = 0; + + l.input_layer->weights[i + 6] = 0; + l.input_layer->weights[i + 7] = 0; + l.input_layer->weights[i + 8] = 0; + */ + l.input_layer->weights[i + 0] = 1 / 16.f; + l.input_layer->weights[i + 1] = 2 / 16.f; + l.input_layer->weights[i + 2] = 1 / 16.f; + + l.input_layer->weights[i + 3] = 2 / 16.f; + l.input_layer->weights[i + 4] = 4 / 16.f; + l.input_layer->weights[i + 5] = 2 / 16.f; + + l.input_layer->weights[i + 6] = 1 / 16.f; + l.input_layer->weights[i + 7] = 2 / 16.f; + l.input_layer->weights[i + 8] = 1 / 16.f; + } + for (i = 0; i < l.out_c; ++i) l.input_layer->biases[i] = 0; +#ifdef GPU + l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); + push_convolutional_layer(*(l.input_layer)); +#endif // GPU + } + return l; } @@ -159,42 +208,54 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) if (!state.train && l.stride_x == l.stride_y) { forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch); - return; } + else { - int b,i,j,k,m,n; - int w_offset = -l.pad / 2; - int h_offset = -l.pad / 2; + int b, i, j, k, m, n; + int w_offset = -l.pad / 2; + int h_offset = -l.pad / 2; - int h = l.out_h; - int w = l.out_w; - int c = l.c; + int h = l.out_h; + int w = l.out_w; + int c = l.c; - for(b = 0; b < l.batch; ++b){ - for(k = 0; k < c; ++k){ - for(i = 0; i < h; ++i){ - for(j = 0; j < w; ++j){ - int out_index = j + w*(i + h*(k + c*b)); - float max = -FLT_MAX; - int max_i = -1; - for(n = 0; n < l.size; ++n){ - for(m = 0; m < l.size; ++m){ - int cur_h = h_offset + i*l.stride_y + n; - int cur_w = w_offset + j*l.stride_x + m; - int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); - int valid = (cur_h >= 0 && cur_h < l.h && - cur_w >= 0 && cur_w < l.w); - float val = (valid != 0) ? state.input[index] : -FLT_MAX; - max_i = (val > max) ? index : max_i; - max = (val > max) ? val : max; + for (b = 0; b < l.batch; ++b) { + for (k = 0; k < c; ++k) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int out_index = j + w*(i + h*(k + c*b)); + float max = -FLT_MAX; + int max_i = -1; + for (n = 0; n < l.size; ++n) { + for (m = 0; m < l.size; ++m) { + int cur_h = h_offset + i*l.stride_y + n; + int cur_w = w_offset + j*l.stride_x + m; + int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); + int valid = (cur_h >= 0 && cur_h < l.h && + cur_w >= 0 && cur_w < l.w); + float val = (valid != 0) ? state.input[index] : -FLT_MAX; + max_i = (val > max) ? index : max_i; + max = (val > max) ? val : max; + } } + l.output[out_index] = max; + l.indexes[out_index] = max_i; } - l.output[out_index] = max; - l.indexes[out_index] = max_i; } } } } + + if (l.antialiasing) { + network_state s = { 0 }; + s.train = state.train; + s.workspace = state.workspace; + s.net = state.net; + s.input = l.output; + forward_convolutional_layer(*(l.input_layer), s); + //simple_copy_ongpu(l.outputs*l.batch, l.output, l.input_antialiasing); + memcpy(l.output, l.input_layer->output, l.input_layer->outputs * l.input_layer->batch * sizeof(float)); + } } void backward_maxpool_layer(const maxpool_layer l, network_state state) diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 4994d45700d..cfedf9d9ee6 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -12,7 +12,7 @@ typedef layer maxpool_layer; extern "C" { #endif image get_maxpool_image(maxpool_layer l); -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing); void resize_maxpool_layer(maxpool_layer *l, int w, int h); void forward_maxpool_layer(const maxpool_layer l, network_state state); void backward_maxpool_layer(const maxpool_layer l, network_state state); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 8e8511003e5..cc546a0b50c 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -3,6 +3,8 @@ #include #include "maxpool_layer.h" +#include "convolutional_layer.h" +#include "blas.h" #include "dark_cuda.h" __global__ void forward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int out_c, int batch, float *input, float *output, int *indexes) @@ -163,22 +165,47 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta //cudnnDestroyTensorDescriptor(layer.srcTensorDesc); //cudnnDestroyTensorDescriptor(layer.dstTensorDesc); - return; } + else #endif + { + int h = layer.out_h; + int w = layer.out_w; + int c = layer.out_c; - int h = layer.out_h; - int w = layer.out_w; - int c = layer.out_c; + size_t n = h*w*c*layer.batch; - size_t n = h*w*c*layer.batch; + forward_maxpool_layer_kernel << > > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); + CHECK_CUDA(cudaPeekAtLastError()); + } - forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); - CHECK_CUDA(cudaPeekAtLastError()); + if (layer.antialiasing) { + network_state s = { 0 }; + s.train = state.train; + s.workspace = state.workspace; + s.net = state.net; + if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) + s.input = layer.output_gpu; + forward_convolutional_layer_gpu(*(layer.input_layer), s); + simple_copy_ongpu(layer.outputs*layer.batch, layer.output_gpu, layer.input_antialiasing_gpu); + simple_copy_ongpu(layer.input_layer->outputs*layer.input_layer->batch, layer.input_layer->output_gpu, layer.output_gpu); + } } extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { + if (layer.antialiasing) { + network_state s = { 0 }; + s.train = state.train; + s.workspace = state.workspace; + s.net = state.net; + s.delta = layer.delta_gpu; + s.input = layer.input_antialiasing_gpu; + //if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) + simple_copy_ongpu(layer.input_layer->outputs*layer.input_layer->batch, layer.delta_gpu, layer.input_layer->delta_gpu); + backward_convolutional_layer_gpu(*(layer.input_layer), s); + } + if (layer.maxpool_depth) { int h = layer.out_h; int w = layer.out_w; diff --git a/src/parser.c b/src/parser.c index fda2bacc041..b89bf0acc7b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -545,6 +545,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) int padding = option_find_int_quiet(options, "padding", size-1); int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0); int out_channels = option_find_int_quiet(options, "out_channels", 1); + int antialiasing = option_find_int_quiet(options, "antialiasing", 0); int batch,h,w,c; h = params.h; @@ -553,7 +554,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) batch=params.batch; if(!(h && w && c)) error("Layer before maxpool layer must output image."); - maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels); + maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing); return layer; } From a63782ca8937f412e943b6c841a5671ce39c60fc Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 2 Sep 2019 15:55:05 +0300 Subject: [PATCH 15/86] Added: efficientnet_b0.cfg --- build/darknet/x64/cfg/efficientnet_b0.cfg | 1005 +++++++++++++++++++++ cfg/efficientnet_b0.cfg | 1005 +++++++++++++++++++++ 2 files changed, 2010 insertions(+) create mode 100644 build/darknet/x64/cfg/efficientnet_b0.cfg create mode 100644 cfg/efficientnet_b0.cfg diff --git a/build/darknet/x64/cfg/efficientnet_b0.cfg b/build/darknet/x64/cfg/efficientnet_b0.cfg new file mode 100644 index 00000000000..3bd3e895bc1 --- /dev/null +++ b/build/darknet/x64/cfg/efficientnet_b0.cfg @@ -0,0 +1,1005 @@ +[net] +# Training +batch=120 +subdivisions=4 +# Testing +#batch=1 +#subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +burn_in=1000 +#burn_in=100 +learning_rate=0.256 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.00005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +### CONV1 - 1 (1) +# conv1 +[convolutional] +filters=32 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +### CONV2 - MBConv1 - 1 (1) +# conv2_1_expand +[convolutional] +filters=32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_1_dwise +[convolutional] +groups=32 +filters=32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=4 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=32 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_1_linear +[convolutional] +filters=16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV3 - MBConv6 - 1 (2) +# conv2_2_expand +[convolutional] +filters=96 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_2_dwise +[convolutional] +groups=96 +filters=96 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=8 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=96 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_2_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 2 (2) +# conv3_1_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv3_1_dwise +[convolutional] +groups=144 +filters=144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv3_1_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV4 - MBConv6 - 1 (2) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_3_1 +[shortcut] +from=-9 +activation=linear + +# conv_3_2_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_3_2_dwise +[convolutional] +groups=144 +filters=144 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_3_2_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 2 (2) +# conv_4_1_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_1_dwise +[convolutional] +groups=192 +filters=192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_1_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV5 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_4_2 +[shortcut] +from=-9 +activation=linear + +# conv_4_3_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_3_dwise +[convolutional] +groups=192 +filters=192 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_3_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 2 (3) +# conv_4_4_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_4_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_4_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_4_4 +[shortcut] +from=-9 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_5_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_5_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_4_6 +[shortcut] +from=-9 +activation=linear + +# conv_4_7_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_7_dwise +[convolutional] +groups=384 +filters=384 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_7_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 2 (3) +# conv_5_1_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_1_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_1_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_5_1 +[shortcut] +from=-9 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_2_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_2_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 1 (4) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_5_2 +[shortcut] +from=-9 +activation=linear + +# conv_5_3_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_3_dwise +[convolutional] +groups=576 +filters=576 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_3_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 2 (4) +# conv_6_1_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_1_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_1_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 3 (4) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 4 (4) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV8 - MBConv6 - 1 (1) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_6_2 +[shortcut] +from=-9 +activation=linear + +# conv_6_3_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_3_dwise +[convolutional] +groups=960 +filters=960 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_3_linear +[convolutional] +filters=320 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV9 - Conv2d 1x1 +# conv_6_4 +[convolutional] +filters=1280 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + + +[avgpool] + +[dropout] +probability=.2 + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=0 +activation=linear + +[softmax] +groups=1 + +#[cost] +#type=sse + diff --git a/cfg/efficientnet_b0.cfg b/cfg/efficientnet_b0.cfg new file mode 100644 index 00000000000..3bd3e895bc1 --- /dev/null +++ b/cfg/efficientnet_b0.cfg @@ -0,0 +1,1005 @@ +[net] +# Training +batch=120 +subdivisions=4 +# Testing +#batch=1 +#subdivisions=1 +height=224 +width=224 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=256 + +burn_in=1000 +#burn_in=100 +learning_rate=0.256 +policy=poly +power=4 +max_batches=800000 +momentum=0.9 +decay=0.00005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +### CONV1 - 1 (1) +# conv1 +[convolutional] +filters=32 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +### CONV2 - MBConv1 - 1 (1) +# conv2_1_expand +[convolutional] +filters=32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_1_dwise +[convolutional] +groups=32 +filters=32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=4 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=32 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_1_linear +[convolutional] +filters=16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV3 - MBConv6 - 1 (2) +# conv2_2_expand +[convolutional] +filters=96 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_2_dwise +[convolutional] +groups=96 +filters=96 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=8 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=96 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_2_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 2 (2) +# conv3_1_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv3_1_dwise +[convolutional] +groups=144 +filters=144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv3_1_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV4 - MBConv6 - 1 (2) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_3_1 +[shortcut] +from=-9 +activation=linear + +# conv_3_2_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_3_2_dwise +[convolutional] +groups=144 +filters=144 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_3_2_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 2 (2) +# conv_4_1_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_1_dwise +[convolutional] +groups=192 +filters=192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_1_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV5 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_4_2 +[shortcut] +from=-9 +activation=linear + +# conv_4_3_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_3_dwise +[convolutional] +groups=192 +filters=192 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_3_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 2 (3) +# conv_4_4_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_4_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_4_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_4_4 +[shortcut] +from=-9 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_5_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_5_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_4_6 +[shortcut] +from=-9 +activation=linear + +# conv_4_7_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_7_dwise +[convolutional] +groups=384 +filters=384 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_7_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 2 (3) +# conv_5_1_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_1_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_1_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_5_1 +[shortcut] +from=-9 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_2_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_2_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 1 (4) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_5_2 +[shortcut] +from=-9 +activation=linear + +# conv_5_3_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_3_dwise +[convolutional] +groups=576 +filters=576 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_3_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 2 (4) +# conv_6_1_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_1_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_1_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 3 (4) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 4 (4) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV8 - MBConv6 - 1 (1) +# dropout only before residual connection +[dropout] +probability=.2 + +# block_6_2 +[shortcut] +from=-9 +activation=linear + +# conv_6_3_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_3_dwise +[convolutional] +groups=960 +filters=960 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_3_linear +[convolutional] +filters=320 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV9 - Conv2d 1x1 +# conv_6_4 +[convolutional] +filters=1280 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + + +[avgpool] + +[dropout] +probability=.2 + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=0 +activation=linear + +[softmax] +groups=1 + +#[cost] +#type=sse + From be5d0d66933e50585688bc86bb42786de55893ab Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 3 Sep 2019 01:35:05 +0300 Subject: [PATCH 16/86] Added assisted_excitation=1 for [convolutional] layer on GPU --- include/darknet.h | 3 + src/conv_lstm_layer.c | 22 ++-- src/convolutional_kernels.cu | 191 +++++++++++++++++++++++++++++++++++ src/convolutional_layer.c | 25 +++-- src/convolutional_layer.h | 3 +- src/crnn_layer.c | 6 +- src/maxpool_layer.c | 2 +- src/parser.c | 6 +- 8 files changed, 232 insertions(+), 26 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index a7a62b47bd7..e78abe6a5c9 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -537,6 +537,9 @@ struct layer { float * rand_gpu; float * squared_gpu; float * norms_gpu; + + float *gt_gpu; + float *a_avg_gpu; #ifdef CUDNN cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; cudnnTensorDescriptor_t srcTensorDesc16, dstTensorDesc16; diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c index a6da3bf0c2c..4ae67b44a83 100644 --- a/src/conv_lstm_layer.c +++ b/src/conv_lstm_layer.c @@ -66,44 +66,44 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // U l.uf = (layer*)calloc(1, sizeof(layer)); - *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.uf->batch = batch; if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; l.ui = (layer*)calloc(1, sizeof(layer)); - *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.ui->batch = batch; if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; l.ug = (layer*)calloc(1, sizeof(layer)); - *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.ug->batch = batch; if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; l.uo = (layer*)calloc(1, sizeof(layer)); - *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.uo->batch = batch; if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; // W l.wf = (layer*)calloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.wf->batch = batch; if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; l.wi = (layer*)calloc(1, sizeof(layer)); - *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.wi->batch = batch; if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; l.wg = (layer*)calloc(1, sizeof(layer)); - *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.wg->batch = batch; if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; l.wo = (layer*)calloc(1, sizeof(layer)); - *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.wo->batch = batch; if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; @@ -111,21 +111,21 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // V l.vf = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.vf->batch = batch; if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; } l.vi = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.vi->batch = batch; if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; } l.vo = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.vo->batch = batch; if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; } diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index b476ac76e3d..566fb893335 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -605,6 +605,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) fix_nan_and_inf(l.output_gpu, l.outputs*l.batch); } + if(l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state); + if (l.antialiasing) { network_state s = { 0 }; s.train = state.train; @@ -890,6 +892,195 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state } } +static box float_to_box_stride(float *f, int stride) +{ + box b = { 0 }; + b.x = f[0]; + b.y = f[1 * stride]; + b.w = f[2 * stride]; + b.h = f[3 * stride]; + return b; +} + +__global__ void calc_avg_activation_kernel(float *src, float *dst, int size, int channels, int batches) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + int xy = i % size; + int b = i / size; + + if (i < size*batches) { + dst[i] = 0; + for (int c = 0; c < channels; ++c) { + dst[i] += src[xy + size*(c + channels*b)]; + } + dst[i] = dst[i] / channels; + } +} + +#include + +void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int batches) +{ + const int num_blocks = get_number_of_blocks(size*batches, BLOCK); + + std::cout << " size = " << size << ", channels = " << channels << ", batches = " << batches << std::endl; + calc_avg_activation_kernel << > > (src, dst, size, channels, batches); +} + + +__global__ void assisted_activation_kernel(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + int xy = i % size; + int b = i / size; + + if (b < batches) { + for (int c = 0; c < channels; ++c) { + output[xy + size*(c + channels*b)] += alpha * gt_gpu[i] * a_avg_gpu[i]; + } + } +} + +void assisted_activation_gpu(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) +{ + const int num_blocks = get_number_of_blocks(size*batches, BLOCK); + + assisted_activation_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); +} + +void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) +{ + const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); + + // epoch + const float epoch = (float)(*state.net.seen) / state.net.train_images_num; + + // calculate alpha + //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); + //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); + const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + + //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", + // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); + + //const int size = l.outputs * l.batch; + + float *a_avg = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); + float *gt = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); + + int b; + int w, h, c; + + l.max_boxes = state.net.num_boxes; + l.truths = l.max_boxes*(4 + 1); + + int num_truth = l.batch*l.truths; + float *truth_cpu = (float *)calloc(num_truth, sizeof(float)); + cuda_pull_array(state.truth, truth_cpu, num_truth); + //cudaStreamSynchronize(get_cuda_stream()); + //CHECK_CUDA(cudaPeekAtLastError()); + + for (b = 0; b < l.batch; ++b) + { + // calculate G + int t; + for (t = 0; t < state.net.num_boxes; ++t) { + box truth = float_to_box_stride(truth_cpu + t*(4 + 1) + b*l.truths, 1); + if (!truth.x) break; // continue; + + int left = floor((truth.x - truth.w / 2) * l.out_w); + int right = ceil((truth.x + truth.w / 2) * l.out_w); + int top = floor((truth.y - truth.h / 2) * l.out_h); + int bottom = ceil((truth.y + truth.h / 2) * l.out_h); + + for (w = left; w <= right; w++) { + for (h = top; h < bottom; h++) { + gt[w + l.out_w * h + l.out_w*l.out_h*b] = 1; + } + } + } + } + + cuda_push_array(l.gt_gpu, gt, l.out_w * l.out_h * l.batch); + //cudaStreamSynchronize(get_cuda_stream()); + //CHECK_CUDA(cudaPeekAtLastError()); + + // calc avg_output on GPU - for whole batch + calc_avg_activation_gpu(l.output_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); + //cudaStreamSynchronize(get_cuda_stream()); + //CHECK_CUDA(cudaPeekAtLastError()); + + // calc new output + assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); + //cudaStreamSynchronize(get_cuda_stream()); + //CHECK_CUDA(cudaPeekAtLastError()); + + + + /* + for (b = 0; b < l.batch; ++b) + { + // calculate average A + for (w = 0; w < l.out_w; w++) { + for (h = 0; h < l.out_h; h++) { + for (c = 0; c < l.out_c; c++) { + a_avg[w + l.out_w*(h + l.out_h*b)] += l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))]; + } + a_avg[w + l.out_w*(h + l.out_h*b)] /= l.out_c; // a_avg / d + } + } + } + + // change activation + for (b = 0; b < l.batch; ++b) + { + for (w = 0; w < l.out_w; w++) { + for (h = 0; h < l.out_h; h++) { + for (c = 0; c < l.out_c; c++) + { + // a = a + alpha(t) + e(c,i,j) = a + alpha(t) + g(i,j) * avg_a(i,j) / channels + l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] += + alpha * + g[w + l.out_w*(h + l.out_h*b)] * + a_avg[w + l.out_w*(h + l.out_h*b)]; + + //l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] = + // alpha * g[w + l.out_w*(h + l.out_h*b)] * a_avg[w + l.out_w*(h + l.out_h*b)]; + } + } + } + } + */ + + if (0) // visualize ground truth + { +#ifdef OPENCV + cuda_pull_array(l.output_gpu, l.output, l.outputs * l.batch); + cudaStreamSynchronize(get_cuda_stream()); + CHECK_CUDA(cudaPeekAtLastError()); + + for (b = 0; b < l.batch; ++b) + { + image img = float_to_image(l.out_w, l.out_h, 1, >[l.out_w*l.out_h*b]); + char buff[100]; + sprintf(buff, "a_excitation_%d", b); + show_image_cv(img, buff); + + image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); + char buff2[100]; + sprintf(buff2, "a_excitation_act_%d", b); + show_image_cv(img2, buff2); + wait_key_cv(5); + } + wait_until_press_key_cv(); +#endif // OPENCV + } + + free(truth_cpu); + free(gt); + free(a_avg); +} + void pull_convolutional_layer(convolutional_layer l) { cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index c5c59576349..157058eb91c 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -332,7 +332,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) #endif #endif -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer) +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation) { int total_batch = batch*steps; int i; @@ -349,6 +349,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, stride_x = stride_y = l.stride = l.stride_x = l.stride_y = 1; // use stride=1 in host-layer } + l.assisted_excitation = assisted_excitation; l.share_layer = share_layer; l.index = index; l.h = h; @@ -503,7 +504,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, #ifdef CUDNN_HALF l.weights_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); l.weight_updates_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); -#endif +#endif // CUDNN_HALF l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); } @@ -547,19 +548,27 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); } + + if (l.assisted_excitation) + { + const int size = l.out_w * l.out_h * l.batch; + l.gt_gpu = cuda_make_array(NULL, size); + l.a_avg_gpu = cuda_make_array(NULL, size); + } #ifdef CUDNN create_convolutional_cudnn_tensors(&l); cudnn_convolutional_setup(&l, cudnn_fastest); -#endif +#endif // CUDNN } -#endif +#endif // GPU l.workspace_size = get_convolutional_workspace_size(l); //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); l.bflops = (2.0 * l.nweights * l.out_h*l.out_w) / 1000000000.; if (l.xnor && l.use_bin_output) fprintf(stderr, "convXB"); else if (l.xnor) fprintf(stderr, "convX "); - else if(l.share_layer) fprintf(stderr, "convS "); + else if (l.share_layer) fprintf(stderr, "convS "); + else if (l.assisted_excitation) fprintf(stderr, "convAE"); else fprintf(stderr, "conv "); if (groups > 1) fprintf(stderr, "%5d/%4d ", n, groups); @@ -579,7 +588,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, printf("AA: "); l.input_layer = (layer*)calloc(1, sizeof(layer)); const int blur_size = 3; - *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL); + *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL, 0); const int blur_nweights = n * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; int i; for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { @@ -636,7 +645,7 @@ void denormalize_convolutional_layer(convolutional_layer l) void test_convolutional_layer() { - convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL); + convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL, 0); l.batch_normalize = 1; float data[] = {1,1,1,1,1, 1,1,1,1,1, @@ -1236,7 +1245,7 @@ void assisted_excitation_forward(convolutional_layer l, network_state state) } } - if(0) // visualize ground truth + if(1) // visualize ground truth { #ifdef OPENCV for (b = 0; b < l.batch; ++b) diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 1012663a5b3..0072ce549c3 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -30,7 +30,7 @@ void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); #endif size_t get_convolutional_workspace_size(layer l); -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer); +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation); void denormalize_convolutional_layer(convolutional_layer l); void resize_convolutional_layer(convolutional_layer *layer, int w, int h); void forward_convolutional_layer(const convolutional_layer layer, network_state state); @@ -57,6 +57,7 @@ int convolutional_out_width(convolutional_layer layer); void rescale_weights(convolutional_layer l, float scale, float trans); void rgbgr_weights(convolutional_layer l); void assisted_excitation_forward(convolutional_layer l, network_state state); +void assisted_excitation_forward_gpu(convolutional_layer l, network_state state); #ifdef __cplusplus } diff --git a/src/crnn_layer.c b/src/crnn_layer.c index e3114fc9497..588db7411a0 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -50,17 +50,17 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float)); l.input_layer = (layer*)calloc(1, sizeof(layer)); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.input_layer->batch = batch; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; l.self_layer = (layer*)calloc(1, sizeof(layer)); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.self_layer->batch = batch; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; l.output_layer = (layer*)calloc(1, sizeof(layer)); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL); + *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); l.output_layer->batch = batch; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 1239262197d..27d338603d1 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -107,7 +107,7 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s printf("AA: "); l.input_layer = (layer*)calloc(1, sizeof(layer)); const int blur_size = 3; - *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL); + *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL, 0); const int blur_nweights = l.out_c * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; int i; for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { diff --git a/src/parser.c b/src/parser.c index b89bf0acc7b..97d6aef9c96 100644 --- a/src/parser.c +++ b/src/parser.c @@ -170,6 +170,8 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); + int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); + int share_index = option_find_int_quiet(options, "share_index", -1000000000); convolutional_layer *share_layer = NULL; if(share_index >= 0) share_layer = &net.layers[share_index]; @@ -186,10 +188,10 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int xnor = option_find_int_quiet(options, "xnor", 0); int use_bin_output = option_find_int_quiet(options, "bin_output", 0); - convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer); + convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); - layer.assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); + if(params.net.adam){ layer.B1 = params.net.B1; From 9c02df864e32259292e3189a0879b361165eadfb Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 4 Sep 2019 18:50:56 +0300 Subject: [PATCH 17/86] Fixed assisted_excitation and added also for [shortcut] layer --- src/convolutional_kernels.cu | 33 ++++++++++++++++++++++++++------- src/convolutional_layer.c | 9 +++++++-- src/image.c | 14 ++++++++++++++ src/image.h | 1 + src/layer.c | 4 ++++ src/parser.c | 3 ++- src/shortcut_layer.c | 21 +++++++++++++++++---- src/shortcut_layer.h | 2 +- 8 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 566fb893335..d766c9cf7cb 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -917,13 +917,10 @@ __global__ void calc_avg_activation_kernel(float *src, float *dst, int size, int } } -#include - void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int batches) { const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - std::cout << " size = " << size << ", channels = " << channels << ", batches = " << batches << std::endl; calc_avg_activation_kernel << > > (src, dst, size, channels, batches); } @@ -937,6 +934,9 @@ __global__ void assisted_activation_kernel(float alpha, float *output, float *gt if (b < batches) { for (int c = 0; c < channels; ++c) { output[xy + size*(c + channels*b)] += alpha * gt_gpu[i] * a_avg_gpu[i]; + //output[xy + size*(c + channels*b)] += gt_gpu[i] * a_avg_gpu[i]; + //output[xy + size*(c + channels*b)] += gt_gpu[i] * output[xy + size*(c + channels*b)]; + //output[xy + size*(c + channels*b)] = a_avg_gpu[i]; } } } @@ -953,12 +953,18 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); // epoch - const float epoch = (float)(*state.net.seen) / state.net.train_images_num; + //const float epoch = (float)(*state.net.seen) / state.net.train_images_num; // calculate alpha //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + //const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); + + if (l.assisted_excitation > 1) { + if (iteration_num > l.assisted_excitation) alpha = 0; + else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)); + } //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); @@ -969,7 +975,7 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) float *gt = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); int b; - int w, h, c; + int w, h; l.max_boxes = state.net.num_boxes; l.truths = l.max_boxes*(4 + 1); @@ -1061,15 +1067,28 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) for (b = 0; b < l.batch; ++b) { + printf(" Assisted Excitation alpha = %f \n", alpha); image img = float_to_image(l.out_w, l.out_h, 1, >[l.out_w*l.out_h*b]); char buff[100]; sprintf(buff, "a_excitation_%d", b); show_image_cv(img, buff); - image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); + //image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); + image img2 = float_to_image_scaled(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); char buff2[100]; sprintf(buff2, "a_excitation_act_%d", b); show_image_cv(img2, buff2); + + /* + int c = l.out_c; + if (c > 4) c = 4; + image img3 = float_to_image(l.out_w, l.out_h, c, &l.output[l.out_w*l.out_h*l.out_c*b]); + image dc = collapse_image_layers(img3, 1); + char buff3[100]; + sprintf(buff3, "a_excitation_act_collapsed_%d", b); + show_image_cv(dc, buff3); + */ + wait_key_cv(5); } wait_until_press_key_cv(); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 157058eb91c..72bb602589e 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1172,12 +1172,17 @@ void assisted_excitation_forward(convolutional_layer l, network_state state) const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); // epoch - const float epoch = (float)(*state.net.seen) / state.net.train_images_num; + //const float epoch = (float)(*state.net.seen) / state.net.train_images_num; // calculate alpha //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); + + if (l.assisted_excitation > 1) { + if (iteration_num > l.assisted_excitation) alpha = 0; + else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)); + } //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); diff --git a/src/image.c b/src/image.c index 2f085801dbe..8befaa2b8c6 100644 --- a/src/image.c +++ b/src/image.c @@ -770,6 +770,20 @@ image make_random_image(int w, int h, int c) return out; } +image float_to_image_scaled(int w, int h, int c, float *data) +{ + image out = make_image(w, h, c); + int abs_max = 0; + int i = 0; + for (i = 0; i < w*h*c; ++i) { + if (fabs(data[i]) > abs_max) abs_max = fabs(data[i]); + } + for (i = 0; i < w*h*c; ++i) { + out.data[i] = data[i] / abs_max; + } + return out; +} + image float_to_image(int w, int h, int c, float *data) { image out = make_empty_image(w,h,c); diff --git a/src/image.h b/src/image.h index 3a1c5b9a73c..14792c9b9ff 100644 --- a/src/image.h +++ b/src/image.h @@ -79,6 +79,7 @@ void print_image(image m); //LIB_API image make_image(int w, int h, int c); image make_random_image(int w, int h, int c); image make_empty_image(int w, int h, int c); +image float_to_image_scaled(int w, int h, int c, float *data); image float_to_image(int w, int h, int c, float *data); image copy_image(image p); image load_image(char *filename, int w, int h, int c); diff --git a/src/layer.c b/src/layer.c index b6ae95dba12..e9ae67b5ff5 100644 --- a/src/layer.c +++ b/src/layer.c @@ -157,6 +157,10 @@ void free_layer(layer l) if (l.x_gpu) cuda_free(l.x_gpu); // dont free if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); + // assisted excitation + if (l.gt_gpu) cuda_free(l.gt_gpu); + if (l.a_avg_gpu) cuda_free(l.a_avg_gpu); + if (l.align_bit_weights_gpu) cuda_free((float *)l.align_bit_weights_gpu); if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu); if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu); diff --git a/src/parser.c b/src/parser.c index 97d6aef9c96..4b56dfc4c10 100644 --- a/src/parser.c +++ b/src/parser.c @@ -601,6 +601,7 @@ layer parse_batchnorm(list *options, size_params params) layer parse_shortcut(list *options, size_params params, network net) { + int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); char *l = option_find(options, "from"); int index = atoi(l); if(index < 0) index = params.index + index; @@ -608,7 +609,7 @@ layer parse_shortcut(list *options, size_params params, network net) int batch = params.batch; layer from = net.layers[index]; - layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation); char *activation_s = option_find_str(options, "activation", "linear"); ACTIVATION activation = get_activation(activation_s); diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 1f7c6d35e90..d056a6a0a20 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -4,9 +4,10 @@ #include #include -layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation) { - fprintf(stderr,"Shortcut Layer: %d\n", index); + if(assisted_excitation) fprintf(stderr, "Shortcut Layer - AE: %d\n", index); + else fprintf(stderr,"Shortcut Layer: %d\n", index); layer l = { (LAYER_TYPE)0 }; l.type = SHORTCUT; l.batch = batch; @@ -19,6 +20,8 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.outputs = w*h*c; l.inputs = l.outputs; + l.assisted_excitation = assisted_excitation; + if(w != w2 || h != h2 || c != c2) fprintf(stderr, " w = %d, w2 = %d, h = %d, h2 = %d, c = %d, c2 = %d \n", w, w2, h, h2, c, c2); l.index = index; @@ -28,13 +31,19 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.forward = forward_shortcut_layer; l.backward = backward_shortcut_layer; - #ifdef GPU +#ifdef GPU l.forward_gpu = forward_shortcut_layer_gpu; l.backward_gpu = backward_shortcut_layer_gpu; l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); l.output_gpu = cuda_make_array(l.output, l.outputs*batch); - #endif + if (l.assisted_excitation) + { + const int size = l.out_w * l.out_h * l.batch; + l.gt_gpu = cuda_make_array(NULL, size); + l.a_avg_gpu = cuda_make_array(NULL, size); + } +#endif // GPU return l; } @@ -72,6 +81,8 @@ void forward_shortcut_layer(const layer l, network_state state) shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); } activate_array(l.output, l.outputs*l.batch, l.activation); + + if (l.assisted_excitation && state.train) assisted_excitation_forward(l, state); } void backward_shortcut_layer(const layer l, network_state state) @@ -89,6 +100,8 @@ void forward_shortcut_layer_gpu(const layer l, network_state state) //shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); input_shortcut_gpu(state.input, l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); + + if (l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state); } void backward_shortcut_layer_gpu(const layer l, network_state state) diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h index b24aa3e6682..ad8d45f3e28 100644 --- a/src/shortcut_layer.h +++ b/src/shortcut_layer.h @@ -7,7 +7,7 @@ #ifdef __cplusplus extern "C" { #endif -layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation); void forward_shortcut_layer(const layer l, network_state state); void backward_shortcut_layer(const layer l, network_state state); void resize_shortcut_layer(layer *l, int w, int h); From 1e0b50c29e3c589ddbcccafc84b1ccd7c538c16b Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 4 Sep 2019 18:51:40 +0300 Subject: [PATCH 18/86] Added yolov3-tiny-prn.cfg and enet-coco.cfg (EfficientNetb0-Yolo- 45.5% mAP@0.5 - 3.7 BFlops) https://github.com/WongKinYiu/PartialResidualNetworks --- README.md | 6 + build/darknet/x64/cfg/enet-coco.cfg | 1072 +++++++++++++++++++++ build/darknet/x64/cfg/yolov3-tiny-prn.cfg | 199 ++++ cfg/enet-coco.cfg | 1072 +++++++++++++++++++++ cfg/yolov3-tiny-prn.cfg | 199 ++++ 5 files changed, 2548 insertions(+) create mode 100644 build/darknet/x64/cfg/enet-coco.cfg create mode 100644 build/darknet/x64/cfg/yolov3-tiny-prn.cfg create mode 100644 cfg/enet-coco.cfg create mode 100644 cfg/yolov3-tiny-prn.cfg diff --git a/README.md b/README.md index 03492ade7e8..b79d9786e1c 100644 --- a/README.md +++ b/README.md @@ -63,12 +63,18 @@ There are weights-file for different cfg-files (smaller size -> faster speed & l * `yolov3-spp.cfg` (240 MB COCO **Yolo v3**) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov3-spp.weights * `yolov3.cfg` (236 MB COCO **Yolo v3**) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov3.weights * `yolov3-tiny.cfg` (34 MB COCO **Yolo v3 tiny**) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov3-tiny.weights +* `enet-coco.cfg` (EfficientNetb0-Yolo- 45.5% mAP@0.5 - 3.7 BFlops) [enetb0-coco_final.weights](https://drive.google.com/file/d/1FlHeQjWEQVJt0ay1PVsiuuMzmtNyv36m/view) and `yolov3-tiny-prn.cfg` (33.1% mAP@0.5 - 3.5 BFlops - [more](https://github.com/WongKinYiu/PartialResidualNetworks)) + +
CLICK ME - Yolo v2 models + * `yolov2.cfg` (194 MB COCO Yolo v2) - requires 4 GB GPU-RAM: https://pjreddie.com/media/files/yolov2.weights * `yolo-voc.cfg` (194 MB VOC Yolo v2) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo-voc.weights * `yolov2-tiny.cfg` (43 MB COCO Yolo v2) - requires 1 GB GPU-RAM: https://pjreddie.com/media/files/yolov2-tiny.weights * `yolov2-tiny-voc.cfg` (60 MB VOC Yolo v2) - requires 1 GB GPU-RAM: http://pjreddie.com/media/files/yolov2-tiny-voc.weights * `yolo9000.cfg` (186 MB Yolo9000-model) - requires 4 GB GPU-RAM: http://pjreddie.com/media/files/yolo9000.weights +
+ Put it near compiled: darknet.exe You can get cfg-files by path: `darknet/cfg/` diff --git a/build/darknet/x64/cfg/enet-coco.cfg b/build/darknet/x64/cfg/enet-coco.cfg new file mode 100644 index 00000000000..b530ed360b3 --- /dev/null +++ b/build/darknet/x64/cfg/enet-coco.cfg @@ -0,0 +1,1072 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +### CONV1 - 1 (1) +# conv1 +[convolutional] +filters=32 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +### CONV2 - MBConv1 - 1 (1) +# conv2_1_expand +[convolutional] +filters=32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_1_dwise +[convolutional] +groups=32 +filters=32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=4 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=32 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_1_linear +[convolutional] +filters=16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV3 - MBConv6 - 1 (2) +# conv2_2_expand +[convolutional] +filters=96 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_2_dwise +[convolutional] +groups=96 +filters=96 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=8 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=96 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_2_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 2 (2) +# conv3_1_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv3_1_dwise +[convolutional] +groups=144 +filters=144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv3_1_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV4 - MBConv6 - 1 (2) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_3_1 +[shortcut] +from=-9 +activation=linear + +# conv_3_2_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_3_2_dwise +[convolutional] +groups=144 +filters=144 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_3_2_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 2 (2) +# conv_4_1_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_1_dwise +[convolutional] +groups=192 +filters=192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_1_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV5 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_4_2 +[shortcut] +from=-9 +activation=linear + +# conv_4_3_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_3_dwise +[convolutional] +groups=192 +filters=192 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_3_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 2 (3) +# conv_4_4_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_4_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_4_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_4_4 +[shortcut] +from=-9 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_5_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_5_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_4_6 +[shortcut] +from=-9 +activation=linear + +# conv_4_7_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_7_dwise +[convolutional] +groups=384 +filters=384 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_7_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 2 (3) +# conv_5_1_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_1_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_1_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_5_1 +[shortcut] +from=-9 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_2_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_2_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 1 (4) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_5_2 +[shortcut] +from=-9 +activation=linear + +# conv_5_3_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_3_dwise +[convolutional] +groups=576 +filters=576 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_3_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 2 (4) +# conv_6_1_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_1_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_1_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 3 (4) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 4 (4) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV8 - MBConv6 - 1 (1) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_6_2 +[shortcut] +from=-9 +activation=linear + +# conv_6_3_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_3_dwise +[convolutional] +groups=960 +filters=960 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_3_linear +[convolutional] +filters=320 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV9 - Conv2d 1x1 +# conv_6_4 +[convolutional] +filters=1280 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +########################## + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-2 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[shortcut] +activation=leaky +from=90 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-3 + +[shortcut] +activation=leaky +from=90 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 1,2,3 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + diff --git a/build/darknet/x64/cfg/yolov3-tiny-prn.cfg b/build/darknet/x64/cfg/yolov3-tiny-prn.cfg new file mode 100644 index 00000000000..109c969cb2a --- /dev/null +++ b/build/darknet/x64/cfg/yolov3-tiny-prn.cfg @@ -0,0 +1,199 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +#batch=64 +#subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-3 + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-2 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[shortcut] +activation=leaky +from=8 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-3 + +[shortcut] +activation=leaky +from=8 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 1,2,3 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 diff --git a/cfg/enet-coco.cfg b/cfg/enet-coco.cfg new file mode 100644 index 00000000000..b530ed360b3 --- /dev/null +++ b/cfg/enet-coco.cfg @@ -0,0 +1,1072 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +### CONV1 - 1 (1) +# conv1 +[convolutional] +filters=32 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +### CONV2 - MBConv1 - 1 (1) +# conv2_1_expand +[convolutional] +filters=32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_1_dwise +[convolutional] +groups=32 +filters=32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=4 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=32 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_1_linear +[convolutional] +filters=16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV3 - MBConv6 - 1 (2) +# conv2_2_expand +[convolutional] +filters=96 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv2_2_dwise +[convolutional] +groups=96 +filters=96 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=8 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=96 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv2_2_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 2 (2) +# conv3_1_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv3_1_dwise +[convolutional] +groups=144 +filters=144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv3_1_linear +[convolutional] +filters=24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV4 - MBConv6 - 1 (2) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_3_1 +[shortcut] +from=-9 +activation=linear + +# conv_3_2_expand +[convolutional] +filters=144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_3_2_dwise +[convolutional] +groups=144 +filters=144 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=8 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=144 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_3_2_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 2 (2) +# conv_4_1_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_1_dwise +[convolutional] +groups=192 +filters=192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_1_linear +[convolutional] +filters=40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV5 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_4_2 +[shortcut] +from=-9 +activation=linear + +# conv_4_3_expand +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_3_dwise +[convolutional] +groups=192 +filters=192 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=16 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=192 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_3_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 2 (3) +# conv_4_4_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_4_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_4_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_4_4 +[shortcut] +from=-9 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_5_dwise +[convolutional] +groups=384 +filters=384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_5_linear +[convolutional] +filters=80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_4_6 +[shortcut] +from=-9 +activation=linear + +# conv_4_7_expand +[convolutional] +filters=384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_4_7_dwise +[convolutional] +groups=384 +filters=384 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=24 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=384 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_4_7_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 2 (3) +# conv_5_1_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_1_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_1_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_5_1 +[shortcut] +from=-9 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_2_dwise +[convolutional] +groups=576 +filters=576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_2_linear +[convolutional] +filters=112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 1 (4) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_5_2 +[shortcut] +from=-9 +activation=linear + +# conv_5_3_expand +[convolutional] +filters=576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_5_3_dwise +[convolutional] +groups=576 +filters=576 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=32 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=576 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_5_3_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 2 (4) +# conv_6_1_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_1_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_1_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 3 (4) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 4 (4) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_6_1 +[shortcut] +from=-9 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_2_dwise +[convolutional] +groups=960 +filters=960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_2_linear +[convolutional] +filters=192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV8 - MBConv6 - 1 (1) +# dropout only before residual connection +[dropout] +probability=.0 + +# block_6_2 +[shortcut] +from=-9 +activation=linear + +# conv_6_3_expand +[convolutional] +filters=960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +# conv_6_3_dwise +[convolutional] +groups=960 +filters=960 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=swish + + +#squeeze-n-excitation +[avgpool] + +# squeeze ratio r=16 (recommended r=16) +[convolutional] +filters=64 +size=1 +stride=1 +activation=swish + +# excitation +[convolutional] +filters=960 +size=1 +stride=1 +activation=logistic + +# multiply channels +[scale_channels] +from=-4 + + +# conv_6_3_linear +[convolutional] +filters=320 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV9 - Conv2d 1x1 +# conv_6_4 +[convolutional] +filters=1280 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=swish + +########################## + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-2 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[shortcut] +activation=leaky +from=90 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-3 + +[shortcut] +activation=leaky +from=90 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 1,2,3 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=0 + diff --git a/cfg/yolov3-tiny-prn.cfg b/cfg/yolov3-tiny-prn.cfg new file mode 100644 index 00000000000..109c969cb2a --- /dev/null +++ b/cfg/yolov3-tiny-prn.cfg @@ -0,0 +1,199 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +#batch=64 +#subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-3 + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-2 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[shortcut] +activation=leaky +from=8 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +activation=leaky +from=-3 + +[shortcut] +activation=leaky +from=8 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 1,2,3 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 From f6fa4a56d938f4f8c69774d3622e768e7411507d Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 4 Sep 2019 19:58:36 +0300 Subject: [PATCH 19/86] compile fix --- build/darknet/x64/cfg/yolov3-tiny-prn.cfg | 8 ++++---- cfg/yolov3-tiny-prn.cfg | 8 ++++---- src/shortcut_layer.c | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/build/darknet/x64/cfg/yolov3-tiny-prn.cfg b/build/darknet/x64/cfg/yolov3-tiny-prn.cfg index 109c969cb2a..215162e973b 100644 --- a/build/darknet/x64/cfg/yolov3-tiny-prn.cfg +++ b/build/darknet/x64/cfg/yolov3-tiny-prn.cfg @@ -1,10 +1,10 @@ [net] # Testing -batch=1 -subdivisions=1 +#batch=1 +#subdivisions=1 # Training -#batch=64 -#subdivisions=8 +batch=64 +subdivisions=8 width=416 height=416 channels=3 diff --git a/cfg/yolov3-tiny-prn.cfg b/cfg/yolov3-tiny-prn.cfg index 109c969cb2a..215162e973b 100644 --- a/cfg/yolov3-tiny-prn.cfg +++ b/cfg/yolov3-tiny-prn.cfg @@ -1,10 +1,10 @@ [net] # Testing -batch=1 -subdivisions=1 +#batch=1 +#subdivisions=1 # Training -#batch=64 -#subdivisions=8 +batch=64 +subdivisions=8 width=416 height=416 channels=3 diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index d056a6a0a20..8345858e47d 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -1,4 +1,5 @@ #include "shortcut_layer.h" +#include "convolutional_layer.h" #include "dark_cuda.h" #include "blas.h" #include From e33019e669cbfe3dfc9037664335bfcb6ae8b988 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 5 Sep 2019 14:47:26 +0300 Subject: [PATCH 20/86] Fixed stride_x and stride_y. Fixed AntiAliasing. Minor fixes. --- src/convolutional_layer.c | 4 ++-- src/data.c | 12 ++++++++++-- src/image_opencv.cpp | 13 +++++++++++-- src/network.c | 7 +++++++ src/sam_layer.c | 4 ++-- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 72bb602589e..00b27f6b88f 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -363,8 +363,8 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.batch = batch; l.steps = steps; l.stride = stride_x; - l.stride_y = stride_x; - l.stride_x = stride_y; + l.stride_x = stride_x; + l.stride_y = stride_y; l.dilation = dilation; l.size = size; l.pad = padding; diff --git a/src/data.c b/src/data.c index 6dd3274ae7b..4206871533f 100644 --- a/src/data.c +++ b/src/data.c @@ -343,7 +343,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int free(boxes); } -void fill_truth_detection(const char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy, +int fill_truth_detection(const char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy, int net_w, int net_h) { char labelpath[4096]; @@ -352,6 +352,7 @@ void fill_truth_detection(const char *path, int num_boxes, float *truth, int cla int count = 0; int i; box_label *boxes = read_boxes(labelpath, &count); + int min_w_h = 0; float lowest_w = 1.F / net_w; float lowest_h = 1.F / net_h; randomize_boxes(boxes, count); @@ -424,8 +425,13 @@ void fill_truth_detection(const char *path, int num_boxes, float *truth, int cla truth[(i-sub)*5+2] = w; truth[(i-sub)*5+3] = h; truth[(i-sub)*5+4] = id; + + if (min_w_h == 0) min_w_h = w*net_w; + if (min_w_h > w*net_w) min_w_h = w*net_w; + if (min_w_h > h*net_h) min_w_h = h*net_h; } free(boxes); + return min_w_h; } @@ -914,7 +920,9 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo float dy = ((float)ptop / oh) / sy; - fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); + int min_w_h = fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); + + if (min_w_h < blur*4) blur = 0; // disable blur if one of the objects is too small image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, blur, boxes, d.y.vals[i]); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index 91d07bd7a8b..b6cb65f9f2a 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1206,11 +1206,20 @@ image image_data_augmentation(mat_cv* mat, int w, int h, if (blur) { cv::Mat dst(sized.size(), sized.type()); if(blur == 1) cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); - else cv::GaussianBlur(sized, dst, cv::Size((blur / 2) * 2 + 1, (blur / 2) * 2 + 1), 0); - cv::Rect img_rect(0, 0, sized.cols, sized.rows); + else { + cv::Size kernel_size = cv::Size((blur / 2) * 2 + 1, (blur / 2) * 2 + 1); + cv::GaussianBlur(sized, dst, kernel_size, 0); + + // sharpen + //cv::Mat img_tmp; + //cv::GaussianBlur(dst, img_tmp, cv::Size(), 3); + //cv::addWeighted(dst, 1.5, img_tmp, -0.5, 0, img_tmp); + //dst = img_tmp; + } //std::cout << " blur num_boxes = " << num_boxes << std::endl; if (blur == 1) { + cv::Rect img_rect(0, 0, sized.cols, sized.rows); int t; for (t = 0; t < num_boxes; ++t) { box b = float_to_box_stride(truth + t*(4 + 1), 1); diff --git a/src/network.c b/src/network.c index 9bdab8adce1..82dc4d53978 100644 --- a/src/network.c +++ b/src/network.c @@ -1150,6 +1150,13 @@ void copy_weights_net(network net_train, network *net_map) copy_cudnn_descriptors(tmp_self_layer, net_map->layers[k].self_layer); copy_cudnn_descriptors(tmp_output_layer, net_map->layers[k].output_layer); } + else if(l->input_layer) // for AntiAliasing + { + layer tmp_input_layer; + copy_cudnn_descriptors(*net_map->layers[k].input_layer, &tmp_input_layer); + net_map->layers[k].input_layer = net_train.layers[k].input_layer; + copy_cudnn_descriptors(tmp_input_layer, net_map->layers[k].input_layer); + } net_map->layers[k].batch = 1; net_map->layers[k].steps = 1; } diff --git a/src/sam_layer.c b/src/sam_layer.c index da28e319775..70e55052883 100644 --- a/src/sam_layer.c +++ b/src/sam_layer.c @@ -60,7 +60,7 @@ void resize_sam_layer(layer *l, int w, int h) void forward_sam_layer(const layer l, network_state state) { int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = 1; + //int channel_size = 1; float *from_output = state.net.layers[l.index].output; int i; @@ -79,7 +79,7 @@ void backward_sam_layer(const layer l, network_state state) //scale_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = 1; + //int channel_size = 1; float *from_output = state.net.layers[l.index].output; float *from_delta = state.net.layers[l.index].delta; From b0b1584a069c12a214791c339bbff9dc3904283e Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 6 Sep 2019 01:53:36 +0300 Subject: [PATCH 21/86] Minor fix --- src/convolutional_layer.c | 2 +- src/maxpool_layer.c | 8 +++++--- src/sam_layer.c | 2 +- src/scale_channels_layer.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 00b27f6b88f..10a45bab5d5 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -574,7 +574,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (groups > 1) fprintf(stderr, "%5d/%4d ", n, groups); else fprintf(stderr, "%5d ", n); - if (stride_x != stride_y) fprintf(stderr, "%2d x%2d/%2dx%2d ", size, size, stride_x, stride_y); + if (stride_x != stride_y) fprintf(stderr, "%2dx%2d/%2dx%2d ", size, size, stride_x, stride_y); else { if (dilation > 1) fprintf(stderr, "%2d x%2d/%2d(%1d)", size, size, stride_x, dilation); else fprintf(stderr, "%2d x%2d/%2d ", size, size, stride_x); diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 27d338603d1..2f290497dcb 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -98,10 +98,12 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s #endif // GPU l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; - if(stride_x == stride_y) - fprintf(stderr, "max %d x %d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + if (maxpool_depth) + fprintf(stderr, "max-depth %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + else if(stride_x == stride_y) + fprintf(stderr, "max %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); else - fprintf(stderr, "max %d x %d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); + fprintf(stderr, "max %2dx%2d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); if (l.antialiasing) { printf("AA: "); diff --git a/src/sam_layer.c b/src/sam_layer.c index 70e55052883..e95290d7867 100644 --- a/src/sam_layer.c +++ b/src/sam_layer.c @@ -18,7 +18,7 @@ layer make_sam_layer(int batch, int index, int w, int h, int c, int w2, int h2, l.out_h = h2; l.out_c = c2; assert(l.out_c == l.c); - assert(l.w == l.out_w & l.h == l.out_h); + assert(l.w == l.out_w && l.h == l.out_h); l.outputs = l.out_w*l.out_h*l.out_c; l.inputs = l.outputs; diff --git a/src/scale_channels_layer.c b/src/scale_channels_layer.c index 7322570116e..80be5361126 100644 --- a/src/scale_channels_layer.c +++ b/src/scale_channels_layer.c @@ -13,7 +13,7 @@ layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w l.w = w; l.h = h; l.c = c; - assert(w == 1 & h == 1); + assert(w == 1 && h == 1); l.out_w = w2; l.out_h = h2; From 1c71f001531a5df0637903117c6568725d7a66b3 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 7 Sep 2019 15:15:19 +0300 Subject: [PATCH 22/86] Fixed shortcut_layer for antialiasing --- src/data.c | 3 +++ src/image_opencv.cpp | 7 +++++-- src/parser.c | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/data.c b/src/data.c index 4206871533f..d8f0c9e81c0 100644 --- a/src/data.c +++ b/src/data.c @@ -959,6 +959,9 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo save_image(tmp_ai, buff); if (show_imgs == 1) { + //char buff_src[1000]; + //sprintf(buff_src, "src_%d_%d_%s_%d", random_index, i, basecfg((char*)filename), random_gen()); + //show_image_mat(src, buff_src); show_image(tmp_ai, buff); wait_until_press_key_cv(); } diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index b6cb65f9f2a..cc50c0719a1 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1207,8 +1207,11 @@ image image_data_augmentation(mat_cv* mat, int w, int h, cv::Mat dst(sized.size(), sized.type()); if(blur == 1) cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); else { - cv::Size kernel_size = cv::Size((blur / 2) * 2 + 1, (blur / 2) * 2 + 1); - cv::GaussianBlur(sized, dst, kernel_size, 0); + int ksize = (blur / 2) * 2 + 1; + cv::Size kernel_size = cv::Size(ksize, ksize); + //cv::GaussianBlur(sized, dst, kernel_size, 0); + //cv::medianBlur(sized, dst, ksize); + cv::bilateralFilter(sized, dst, ksize, 75, 75); // sharpen //cv::Mat img_tmp; diff --git a/src/parser.c b/src/parser.c index 4b56dfc4c10..829134d1131 100644 --- a/src/parser.c +++ b/src/parser.c @@ -608,6 +608,7 @@ layer parse_shortcut(list *options, size_params params, network net) int batch = params.batch; layer from = net.layers[index]; + if (from.antialiasing) from = *from.input_layer; layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation); From fa74f691cb624d916503af309de13300638d097f Mon Sep 17 00:00:00 2001 From: "gilberto.plaza" Date: Wed, 18 Sep 2019 13:49:54 +0200 Subject: [PATCH 23/86] Removed useless snippet that only breaks if batch processing is enabled --- src/yolo_layer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/yolo_layer.c b/src/yolo_layer.c index a76b5efb082..20ee8e34391 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -492,7 +492,9 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, //printf("\n l.batch = %d, l.w = %d, l.h = %d, l.n = %d \n", l.batch, l.w, l.h, l.n); int i,j,n; float *predictions = l.output; - if (l.batch == 2) avg_flipped_yolo(l); + // This snippet below is not necessary + // Need to comment it in order to batch processing >= 2 images + //if (l.batch == 2) avg_flipped_yolo(l); int count = 0; for (i = 0; i < l.w*l.h; ++i){ int row = i / l.w; From 6f4d93bb9f59a805e6884789c97623e98806450d Mon Sep 17 00:00:00 2001 From: Alexey Date: Sun, 22 Sep 2019 17:38:42 +0300 Subject: [PATCH 24/86] Update readme.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b79d9786e1c..36019663e86 100644 --- a/README.md +++ b/README.md @@ -569,7 +569,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol * each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. - * recalculate anchors for your dataset for `width` and `height` from cfg-file: + * Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so that 1st-[yolo]-layer has anchors larger than 60x60, 2nd larger than 30x30, 3rd remaining. Also you should change the `filters=(classes + 5)*` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors. From 05545b260784cb5ce9b27f85462c4a31137b684b Mon Sep 17 00:00:00 2001 From: Alexey Date: Tue, 24 Sep 2019 15:31:28 +0300 Subject: [PATCH 25/86] Create FUNDING.yml --- .github/FUNDING.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000000..0c5ae2e2b8f --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,12 @@ +# These are supported funding model platforms + +github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +custom: ['https://paypal.me/alexeyab84', 'https://blockchain.coinmarketcap.com/address/bitcoin/36La9T7DoLVMrUQzm6rBDGsxutyvDzbHnp', 'https://etherscan.io/address/0x193d56BE3C65e3Fb8f48c291B17C0702e211A588#', 'https://explorer.zcha.in/accounts/t1PzwJ28Prb7Nk8fgfT3RXCr6Xtw54tgjoy'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] From 2fa539779f4e12e264b9e1b2fc463ac7edec165c Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 27 Sep 2019 22:35:56 +0300 Subject: [PATCH 26/86] Readme.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 36019663e86..ca3c2d9f37d 100644 --- a/README.md +++ b/README.md @@ -540,6 +540,8 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects + * What is the best way to mark objects: label only the visible part of the object, or label the visible and overlapped part of the object, or label a little more than the entire object (with a little gap)? Mark as you like - how would you like it to be detected. + * for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last `[yolo]`-layer or `[region]`-layer in your cfg-file (the global maximum number of objects that can be detected by YoloV3 is `0,0615234375*(width*height)` where are width and height are parameters from `[net]` section in cfg-file) * for training for small objects (smaller than 16x16 after the image is resized to 416x416) - set `layers = -1, 11` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L720 From ff0b739e4357afba477c25f37fba08410ec86b04 Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 3 Oct 2019 12:12:00 +0300 Subject: [PATCH 27/86] Update Readme.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ca3c2d9f37d..29e5174451e 100644 --- a/README.md +++ b/README.md @@ -451,11 +451,11 @@ Usually sufficient 2000 iterations for each class(object), but not less than 400 > Region Avg IOU: 0.798363, Class: 0.893232, Obj: 0.700808, No Obj: 0.004567, Avg Recall: 1.000000, count: 8 > Region Avg IOU: 0.800677, Class: 0.892181, Obj: 0.701590, No Obj: 0.004574, Avg Recall: 1.000000, count: 8 > - > **9002**: 0.211667, **0.060730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images + > **9002**: 0.211667, **0.60730 avg**, 0.001000 rate, 3.868000 seconds, 576128 images > Loaded: 0.000000 seconds * **9002** - iteration number (number of batch) - * **0.060730 avg** - average loss (error) - **the lower, the better** + * **0.60730 avg** - average loss (error) - **the lower, the better** When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). From 7181c7435f6ccc99b9c9340eccb5bfd16826804e Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 3 Oct 2019 16:40:08 +0300 Subject: [PATCH 28/86] Update readme.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 29e5174451e..fa44665be17 100644 --- a/README.md +++ b/README.md @@ -333,7 +333,7 @@ Training Yolo v3: * change line batch to [`batch=64`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L3) * change line subdivisions to [`subdivisions=8`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L4) - * change line max_batches to (`classes*2000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes + * change line max_batches to (`classes*2000` but not less than `4000`), f.e. [`max_batches=6000`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L20) if you train for 3 classes * change line steps to 80% and 90% of max_batches, f.e. [`steps=4800,5400`](https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L22) * change line `classes=80` to your number of objects in each of 3 `[yolo]`-layers: * https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610 From e24c96dc8bfe151c9c39684c34345f7981b3a08e Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 3 Oct 2019 17:28:06 +0300 Subject: [PATCH 29/86] Update Readme.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index fa44665be17..8027726284f 100644 --- a/README.md +++ b/README.md @@ -536,6 +536,8 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol * check that each object that you want to detect is mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark + * my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong. + * for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects From 0823d04247573d5371733ce6755ba143e1591108 Mon Sep 17 00:00:00 2001 From: acxz <17132214+acxz@users.noreply.github.com> Date: Fri, 18 Oct 2019 19:24:49 -0400 Subject: [PATCH 30/86] Add readability changes Make CMake-GUI install more visible than vcpkg install --- README.md | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 2f85a17c15e..8f14d167ba9 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ More details: http://pjreddie.com/darknet/yolo/ * [Using cmake](#how-to-compile-on-linux-using-cmake) * [Using make](#how-to-compile-on-linux-using-make) 3. How to compile on Windows + * [Using CMake-GUI](#how-to-compile-on-windows-using-cmake-gui) * [Using vcpkg](#how-to-compile-on-windows-using-vcpkg) - * [Using Cmake-GUI](#how-to-compile-on-windows-using-cmake-gui) * [Legacy way](#how-to-compile-on-windows-legacy-way) 4. [How to train (Pascal VOC Data)](#how-to-train-pascal-voc-data) 5. [How to train with multi-GPU:](#how-to-train-with-multi-gpu) @@ -159,7 +159,7 @@ The `CMakeLists.txt` will attempt to find installed optional dependencies like CUDA, cudnn, ZED and build against those. It will also create a shared object library file to use `darknet` for code development. -Inside the cloned repository: +Do inside the cloned repository: ``` mkdir build-release @@ -187,9 +187,28 @@ Before make, you can set such options in the `Makefile`: [link](https://github.c To run Darknet on Linux use examples from this article, just use `./darknet` instead of `darknet.exe`, i.e. use this command: `./darknet detector test ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights` +### How to compile on Windows (using `CMake-GUI`) + +This is the recommended approach to build Darknet on Windows if you have already +installed Visual Studio 2015/2017/2019, CUDA > 10.0, cuDNN > 7.0, and +OpenCV > 2.4. + +Use `CMake-GUI` as shown here on this [**IMAGE**](https://user-images.githubusercontent.com/4096485/55107892-6becf380-50e3-11e9-9a0a-556a943c429a.png): + +1. Configure +2. Optional platform for generator (Set: x64) +3. Finish +4. Generate +5. Open Project +6. Set: x64 & Release +7. Build +8. Build solution + ### How to compile on Windows (using `vcpkg`) -If you have already installed Visual Studio 2015/2017/2019, CUDA > 10.0, cuDNN > 7.0, OpenCV > 2.4, then compile Darknet by using `C:\Program Files\CMake\bin\cmake-gui.exe` as on this [**IMAGE**](https://user-images.githubusercontent.com/4096485/55107892-6becf380-50e3-11e9-9a0a-556a943c429a.png): Configure -> Optional platform for generator (Set: x64) -> Finish -> Generate -> Open Project -> x64 & Release -> Build -> Build solution +If you have already installed Visual Studio 2015/2017/2019, CUDA > 10.0, +cuDNN > 7.0, OpenCV > 2.4, then to compile Darknet it is recommended to use +[CMake-GUI](#how-to-compile-on-windows-using-cmake-gui). Otherwise, follow these steps: @@ -216,19 +235,6 @@ PS Code\vcpkg> .\vcpkg install pthreads opencv[ffmpeg] #replace with ope 9. Open Powershell, go to the `darknet` folder and build with the command `.\build.ps1`. If you want to use Visual Studio, you will find two custom solutions created for you by CMake after the build, one in `build_win_debug` and the other in `build_win_release`, containing all the appropriate config flags for your system. -### How to compile on Windows (using `Cmake-GUI`) - -Using `Cmake-GUI` as shown here on this [**IMAGE**](https://user-images.githubusercontent.com/4096485/55107892-6becf380-50e3-11e9-9a0a-556a943c429a.png): - -1. Configure -2. Optional platform for generator (Set: x64) -3. Finish -4. Generate -5. Open Project -6. x64 & Release -7. Build -8. Build solution - ### How to compile on Windows (legacy way) 1. If you have **CUDA 10.0, cuDNN 7.4 and OpenCV 3.x** (with paths: `C:\opencv_3.0\opencv\build\include` & `C:\opencv_3.0\opencv\build\x64\vc14\lib`), then open `build\darknet\darknet.sln`, set **x64** and **Release** https://hsto.org/webt/uh/fk/-e/uhfk-eb0q-hwd9hsxhrikbokd6u.jpeg and do the: Build -> Build darknet. Also add Windows system variable `CUDNN` with path to CUDNN: https://user-images.githubusercontent.com/4096485/53249764-019ef880-36ca-11e9-8ffe-d9cf47e7e462.jpg From 2eb68d5177d43d6927753631a0d4fad5446cab17 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 19 Oct 2019 15:09:15 +0300 Subject: [PATCH 31/86] Update Readme.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 2aed1180a43..9042a6f99e7 100644 --- a/README.md +++ b/README.md @@ -625,6 +625,13 @@ Here you can find repository with GUI-software for marking bounded boxes of obje With example of: `train.txt`, `obj.names`, `obj.data`, `yolo-obj.cfg`, `air`1-6`.txt`, `bird`1-4`.txt` for 2 classes of objects (air, bird) and `train_obj.cmd` with example how to train this image-set with Yolo v2 & v3 +Different tools for marking objects in images: +1. in C++: https://github.com/AlexeyAB/Yolo_mark +2. in Python: https://github.com/tzutalin/labelImg +3. in Python: https://github.com/Cartucho/OpenLabeling +4. in C++: https://www.ccoderun.ca/darkmark/ + + ## Using Yolo9000 Simultaneous detection and classification of 9000 objects: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights data/dog.jpg` From e6486ab594e877e0b870eab6788de9e888c35840 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 21 Oct 2019 15:33:01 +0300 Subject: [PATCH 32/86] Use ignore_thresh only if class_id matched. Temporary changed Assisted_Excitation (reduces background activations rather than enhancing objects activations). Added antialiasiong=2 for 2x2. --- src/convolutional_kernels.cu | 40 +++++++++++++++++---- src/convolutional_layer.c | 70 +++++++++++++++++++++--------------- src/detector.c | 3 +- src/http_stream.cpp | 2 +- src/maxpool_layer.c | 69 +++++++++++++++++++++-------------- src/yolo_layer.c | 32 ++++++++++++++++- 6 files changed, 151 insertions(+), 65 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index d766c9cf7cb..edfb03b81c7 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -948,6 +948,30 @@ void assisted_activation_gpu(float alpha, float *output, float *gt_gpu, float *a assisted_activation_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); } + +__global__ void assisted_activation2_kernel(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + int xy = i % size; + int b = i / size; + float beta = 1 - alpha; + + if (b < batches) { + for (int c = 0; c < channels; ++c) { + if(gt_gpu[i] == 0) + output[xy + size*(c + channels*b)] *= beta; + + } + } +} + +void assisted_activation2_gpu(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) +{ + const int num_blocks = get_number_of_blocks(size*batches, BLOCK); + + assisted_activation2_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); +} + void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) { const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); @@ -958,12 +982,13 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) // calculate alpha //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - //const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; - float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); + float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + //float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); if (l.assisted_excitation > 1) { - if (iteration_num > l.assisted_excitation) alpha = 0; - else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)); + if (iteration_num < state.net.burn_in) alpha = 0; + else if (iteration_num > l.assisted_excitation) alpha = 0; + else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)) / 2; } //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", @@ -1017,7 +1042,8 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) //CHECK_CUDA(cudaPeekAtLastError()); // calc new output - assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); + assisted_activation2_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); + //assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); //cudaStreamSynchronize(get_cuda_stream()); //CHECK_CUDA(cudaPeekAtLastError()); @@ -1070,13 +1096,13 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) printf(" Assisted Excitation alpha = %f \n", alpha); image img = float_to_image(l.out_w, l.out_h, 1, >[l.out_w*l.out_h*b]); char buff[100]; - sprintf(buff, "a_excitation_%d", b); + sprintf(buff, "a_excitation_gt_%d", b); show_image_cv(img, buff); //image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); image img2 = float_to_image_scaled(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); char buff2[100]; - sprintf(buff2, "a_excitation_act_%d", b); + sprintf(buff2, "a_excitation_output_%d", b); show_image_cv(img2, buff2); /* diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 10a45bab5d5..8bce5aa67d9 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -587,36 +587,50 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (l.antialiasing) { printf("AA: "); l.input_layer = (layer*)calloc(1, sizeof(layer)); - const int blur_size = 3; - *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL, 0); + int blur_size = 3; + int blur_pad = blur_size / 2; + if (l.antialiasing == 2) { + blur_size = 2; + blur_pad = 0; + } + *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL, 0); const int blur_nweights = n * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; int i; - for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - /* - l.input_layer->weights[i + 0] = 0; - l.input_layer->weights[i + 1] = 0; - l.input_layer->weights[i + 2] = 0; - - l.input_layer->weights[i + 3] = 0; - l.input_layer->weights[i + 4] = 1; - l.input_layer->weights[i + 5] = 0; - - l.input_layer->weights[i + 6] = 0; - l.input_layer->weights[i + 7] = 0; - l.input_layer->weights[i + 8] = 0; - */ - l.input_layer->weights[i + 0] = 1 / 16.f; - l.input_layer->weights[i + 1] = 2 / 16.f; - l.input_layer->weights[i + 2] = 1 / 16.f; - - l.input_layer->weights[i + 3] = 2 / 16.f; - l.input_layer->weights[i + 4] = 4 / 16.f; - l.input_layer->weights[i + 5] = 2 / 16.f; - - l.input_layer->weights[i + 6] = 1 / 16.f; - l.input_layer->weights[i + 7] = 2 / 16.f; - l.input_layer->weights[i + 8] = 1 / 16.f; - + if (blur_size == 2) { + for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { + l.input_layer->weights[i + 0] = 1 / 4.f; + l.input_layer->weights[i + 1] = 1 / 4.f; + l.input_layer->weights[i + 2] = 1 / 4.f; + l.input_layer->weights[i + 3] = 1 / 4.f; + } + } + else { + for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { + /* + l.input_layer->weights[i + 0] = 0; + l.input_layer->weights[i + 1] = 0; + l.input_layer->weights[i + 2] = 0; + + l.input_layer->weights[i + 3] = 0; + l.input_layer->weights[i + 4] = 1; + l.input_layer->weights[i + 5] = 0; + + l.input_layer->weights[i + 6] = 0; + l.input_layer->weights[i + 7] = 0; + l.input_layer->weights[i + 8] = 0; + */ + l.input_layer->weights[i + 0] = 1 / 16.f; + l.input_layer->weights[i + 1] = 2 / 16.f; + l.input_layer->weights[i + 2] = 1 / 16.f; + + l.input_layer->weights[i + 3] = 2 / 16.f; + l.input_layer->weights[i + 4] = 4 / 16.f; + l.input_layer->weights[i + 5] = 2 / 16.f; + + l.input_layer->weights[i + 6] = 1 / 16.f; + l.input_layer->weights[i + 7] = 2 / 16.f; + l.input_layer->weights[i + 8] = 1 / 16.f; + } } for (i = 0; i < n; ++i) l.input_layer->biases[i] = 0; #ifdef GPU diff --git a/src/detector.c b/src/detector.c index 689ace6da83..efe5571aa8d 100644 --- a/src/detector.c +++ b/src/detector.c @@ -798,7 +798,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa replace_image_to_label(path, labelpath); int num_labels = 0; box_label *truth = read_boxes(labelpath, &num_labels); - int i, j; + int j; for (j = 0; j < num_labels; ++j) { truth_classes_count[truth[j].id]++; } @@ -818,6 +818,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa const int checkpoint_detections_count = detections_count; + int i; for (i = 0; i < nboxes; ++i) { int class_id; diff --git a/src/http_stream.cpp b/src/http_stream.cpp index 3ce2a212bca..af2e6730c74 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -48,7 +48,7 @@ static int close_socket(SOCKET s) { cerr << "Close socket: out = " << close_output << ", in = " << close_input << " \n"; return result; } -#else // nix +#else // _WIN32 - else: nix #include "darkunistd.h" #include #include diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 2f290497dcb..dca9c1b15f7 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -108,35 +108,50 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s if (l.antialiasing) { printf("AA: "); l.input_layer = (layer*)calloc(1, sizeof(layer)); - const int blur_size = 3; - *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_size / 2, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL, 0); + int blur_size = 3; + int blur_pad = blur_size / 2; + if (l.antialiasing == 2) { + blur_size = 2; + blur_pad = 0; + } + *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL, 0); const int blur_nweights = l.out_c * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; int i; - for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - /* - l.input_layer->weights[i + 0] = 0; - l.input_layer->weights[i + 1] = 0; - l.input_layer->weights[i + 2] = 0; - - l.input_layer->weights[i + 3] = 0; - l.input_layer->weights[i + 4] = 1; - l.input_layer->weights[i + 5] = 0; - - l.input_layer->weights[i + 6] = 0; - l.input_layer->weights[i + 7] = 0; - l.input_layer->weights[i + 8] = 0; - */ - l.input_layer->weights[i + 0] = 1 / 16.f; - l.input_layer->weights[i + 1] = 2 / 16.f; - l.input_layer->weights[i + 2] = 1 / 16.f; - - l.input_layer->weights[i + 3] = 2 / 16.f; - l.input_layer->weights[i + 4] = 4 / 16.f; - l.input_layer->weights[i + 5] = 2 / 16.f; - - l.input_layer->weights[i + 6] = 1 / 16.f; - l.input_layer->weights[i + 7] = 2 / 16.f; - l.input_layer->weights[i + 8] = 1 / 16.f; + if (blur_size == 2) { + for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { + l.input_layer->weights[i + 0] = 1 / 4.f; + l.input_layer->weights[i + 1] = 1 / 4.f; + l.input_layer->weights[i + 2] = 1 / 4.f; + l.input_layer->weights[i + 3] = 1 / 4.f; + } + } + else { + for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { + /* + l.input_layer->weights[i + 0] = 0; + l.input_layer->weights[i + 1] = 0; + l.input_layer->weights[i + 2] = 0; + + l.input_layer->weights[i + 3] = 0; + l.input_layer->weights[i + 4] = 1; + l.input_layer->weights[i + 5] = 0; + + l.input_layer->weights[i + 6] = 0; + l.input_layer->weights[i + 7] = 0; + l.input_layer->weights[i + 8] = 0; + */ + l.input_layer->weights[i + 0] = 1 / 16.f; + l.input_layer->weights[i + 1] = 2 / 16.f; + l.input_layer->weights[i + 2] = 1 / 16.f; + + l.input_layer->weights[i + 3] = 2 / 16.f; + l.input_layer->weights[i + 4] = 4 / 16.f; + l.input_layer->weights[i + 5] = 2 / 16.f; + + l.input_layer->weights[i + 6] = 1 / 16.f; + l.input_layer->weights[i + 7] = 2 / 16.f; + l.input_layer->weights[i + 8] = 1 / 16.f; + } } for (i = 0; i < l.out_c; ++i) l.input_layer->biases[i] = 0; #ifdef GPU diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 20ee8e34391..2006f4b8047 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -128,6 +128,26 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw return b; } + +int get_yolo_class(float *output, int classes, int class_index, int stride, float objectness) +{ + int class_id = 0; + float max_prob = FLT_MIN; + + int j; + for (j = 0; j < classes; ++j) { + float prob = objectness * output[class_index + stride*j]; + if (prob > max_prob) { + max_prob = prob; + class_id = j; + } + //int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); + //float prob = objectness*predictions[class_index]; + //dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + return class_id; +} + ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss) { ious all_ious = { 0 }; @@ -272,6 +292,7 @@ void forward_yolo_layer(const layer l, network_state state) box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); float best_iou = 0; int best_t = 0; + int class_id_match = 0; for (t = 0; t < l.max_boxes; ++t) { box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; @@ -282,8 +303,17 @@ void forward_yolo_layer(const layer l, network_state state) continue; // if label contains class_id more than number of classes in the cfg-file } if (!truth.x) break; // continue; + + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); + float objectness = l.output[obj_index]; + int pred_class_id = get_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness); + if (class_id == pred_class_id) class_id_match = 1; + else class_id_match = 0; + float iou = box_iou(pred, truth); - if (iou > best_iou) { + //if (iou > best_iou) { + if (iou > best_iou && class_id_match == 1) { best_iou = iou; best_t = t; } From bb7d69941cbce4f67f10406395d685ea92be9478 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 21 Oct 2019 15:49:46 +0300 Subject: [PATCH 33/86] Added debugging info for Training Classifier for case: Too many or too few labels --- src/data.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/data.c b/src/data.c index d8f0c9e81c0..7cb7bf0a998 100644 --- a/src/data.c +++ b/src/data.c @@ -498,7 +498,16 @@ void fill_truth(char *path, char **labels, int k, float *truth) ++count; } } - if(count != 1) printf("Too many or too few labels: %d, %s\n", count, path); + if (count != 1) { + printf("Too many or too few labels: %d, %s\n", count, path); + count = 0; + for (i = 0; i < k; ++i) { + if (strstr(path, labels[i])) { + printf("\t label %d: %s \n", count, labels[i]); + count++; + } + } + } } void fill_hierarchy(float *truth, int k, tree *hierarchy) From b3a24952985352f81d759095aa3b38c4e761c342 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 25 Oct 2019 20:47:17 +0300 Subject: [PATCH 34/86] Added Gaussian YOLOv3 layer [Gaussian_yolo] --- Makefile | 2 +- build/darknet/darknet.vcxproj | 2 + include/darknet.h | 2 + src/box.c | 10 + src/box.h | 1 + src/convolutional_kernels.cu | 11 +- src/convolutional_layer.c | 11 +- src/data.c | 11 +- src/gaussian_yolo_layer.c | 445 ++++++++++++++++++++++++++++++++++ src/gaussian_yolo_layer.h | 20 ++ src/network.c | 16 ++ src/parser.c | 65 +++++ src/yolo_layer.c | 10 - 13 files changed, 565 insertions(+), 41 deletions(-) create mode 100644 src/gaussian_yolo_layer.c create mode 100644 src/gaussian_yolo_layer.h diff --git a/Makefile b/Makefile index 25a85f81855..41e5fc8d737 100644 --- a/Makefile +++ b/Makefile @@ -118,7 +118,7 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_core -lsl_input -lsl_zed #-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0 endif -OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o +OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index d7dc91590fa..b685bebd50b 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -199,6 +199,7 @@ + @@ -263,6 +264,7 @@ + diff --git a/include/darknet.h b/include/darknet.h index e78abe6a5c9..00b49921f52 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -149,6 +149,7 @@ typedef enum { XNOR, REGION, YOLO, + GAUSSIAN_YOLO, ISEG, REORG, REORG_OLD, @@ -728,6 +729,7 @@ typedef struct detection{ float *mask; float objectness; int sort_class; + float *uc; // Gaussian_YOLOv3 - tx,ty,tw,th uncertainty } detection; // matrix.h diff --git a/src/box.c b/src/box.c index 1b5c4998a6b..c6a27ed587f 100644 --- a/src/box.c +++ b/src/box.c @@ -13,6 +13,16 @@ box float_to_box(float *f) return b; } +box float_to_box_stride(float *f, int stride) +{ + box b = { 0 }; + b.x = f[0]; + b.y = f[1 * stride]; + b.w = f[2 * stride]; + b.h = f[3 * stride]; + return b; +} + dbox derivative(box a, box b) { dbox d; diff --git a/src/box.h b/src/box.h index 2392fedd20c..172c135293c 100644 --- a/src/box.h +++ b/src/box.h @@ -31,6 +31,7 @@ typedef struct detection_with_class { extern "C" { #endif box float_to_box(float *f); +box float_to_box_stride(float *f, int stride); float box_iou(box a, box b); float box_rmse(box a, box b); dxrep dx_box_iou(box a, box b, IOU_LOSS iou_loss); diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index edfb03b81c7..23005ccb91e 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -10,6 +10,7 @@ #include "col2im.h" #include "utils.h" #include "dark_cuda.h" +#include "box.h" __global__ void binarize_kernel(float *x, int n, float *binary) @@ -892,16 +893,6 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state } } -static box float_to_box_stride(float *f, int stride) -{ - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; -} - __global__ void calc_avg_activation_kernel(float *src, float *dst, int size, int channels, int batches) { int i = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 8bce5aa67d9..6818b603529 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -5,6 +5,7 @@ #include "col2im.h" #include "blas.h" #include "gemm.h" +#include "box.h" #include #include @@ -1171,16 +1172,6 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) } } -static box float_to_box_stride(float *f, int stride) -{ - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; -} - void assisted_excitation_forward(convolutional_layer l, network_state state) { const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); diff --git a/src/data.c b/src/data.c index 7cb7bf0a998..c0af1ab857d 100644 --- a/src/data.c +++ b/src/data.c @@ -2,6 +2,7 @@ #include "utils.h" #include "image.h" #include "dark_cuda.h" +#include "box.h" #include #include @@ -779,16 +780,6 @@ data load_data_swag(char **paths, int n, int classes, float jitter) return d; } -static box float_to_box_stride(float *f, int stride) -{ - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; -} - void blend_truth(float *new_truth, int boxes, float *old_truth) { const int t_size = 4 + 1; diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c new file mode 100644 index 00000000000..320834018d9 --- /dev/null +++ b/src/gaussian_yolo_layer.c @@ -0,0 +1,445 @@ +// Gaussian YOLOv3 implementation +// Author: Jiwoong Choi +// ICCV 2019 Paper: http://openaccess.thecvf.com/content_ICCV_2019/html/Choi_Gaussian_YOLOv3_An_Accurate_and_Fast_Object_Detector_Using_Localization_ICCV_2019_paper.html +// arxiv.org: https://arxiv.org/abs/1904.04620v2 +// source code: https://github.com/jwchoi384/Gaussian_YOLOv3 + +#include "gaussian_yolo_layer.h" +#include "activations.h" +#include "blas.h" +#include "box.h" +#include "dark_cuda.h" +#include "utils.h" + +#include +#include +#include +#include + +#ifndef M_PI +#define M_PI 3.141592 +#endif + +layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +{ + int i; + layer l = {0}; + l.type = GAUSSIAN_YOLO; + + l.n = n; + l.total = total; + l.batch = batch; + l.h = h; + l.w = w; + l.c = n*(classes + 8 + 1); + l.out_w = l.w; + l.out_h = l.h; + l.out_c = l.c; + l.classes = classes; + l.cost = calloc(1, sizeof(float)); + l.biases = calloc(total*2, sizeof(float)); + if(mask) l.mask = mask; + else{ + l.mask = calloc(n, sizeof(int)); + for(i = 0; i < n; ++i){ + l.mask[i] = i; + } + } + l.bias_updates = calloc(n*2, sizeof(float)); + l.outputs = h*w*n*(classes + 8 + 1); + l.inputs = l.outputs; + l.truths = 90*(4 + 1); + l.delta = calloc(batch*l.outputs, sizeof(float)); + l.output = calloc(batch*l.outputs, sizeof(float)); + for(i = 0; i < total*2; ++i){ + l.biases[i] = .5; + } + + l.forward = forward_gaussian_yolo_layer; + l.backward = backward_gaussian_yolo_layer; +#ifdef GPU + l.forward_gpu = forward_gaussian_yolo_layer_gpu; + l.backward_gpu = backward_gaussian_yolo_layer_gpu; + l.output_gpu = cuda_make_array(l.output, batch*l.outputs); + l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); +#endif + + fprintf(stderr, "Gaussian_yolo\n"); + srand(0); + + return l; +} + +void resize_gaussian_yolo_layer(layer *l, int w, int h) +{ + l->w = w; + l->h = h; + + l->outputs = h*w*l->n*(l->classes + 8 + 1); + l->inputs = l->outputs; + + l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + + l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); + l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); +#endif +} + +box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +{ + box b; + b.x = (i + x[index + 0*stride]) / lw; + b.y = (j + x[index + 2*stride]) / lh; + b.w = exp(x[index + 4*stride]) * biases[2*n] / w; + b.h = exp(x[index + 6*stride]) * biases[2*n+1] / h; + return b; +} + +float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +{ + box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + float iou = box_iou(pred, truth); + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2*n]); + float th = log(truth.h*h / biases[2*n + 1]); + + float sigma_const = 0.3; + float epsi = pow(10,-9); + + float in_exp_x = (tx - x[index + 0*stride])/x[index+1*stride]; + float in_exp_x_2 = pow(in_exp_x, 2); + float normal_dist_x = exp(in_exp_x_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+1*stride]+sigma_const)); + + float in_exp_y = (ty - x[index + 2*stride])/x[index+3*stride]; + float in_exp_y_2 = pow(in_exp_y, 2); + float normal_dist_y = exp(in_exp_y_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+3*stride]+sigma_const)); + + float in_exp_w = (tw - x[index + 4*stride])/x[index+5*stride]; + float in_exp_w_2 = pow(in_exp_w, 2); + float normal_dist_w = exp(in_exp_w_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+5*stride]+sigma_const)); + + float in_exp_h = (th - x[index + 6*stride])/x[index+7*stride]; + float in_exp_h_2 = pow(in_exp_h, 2); + float normal_dist_h = exp(in_exp_h_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+7*stride]+sigma_const)); + + float temp_x = (1./2.) * 1./(normal_dist_x+epsi) * normal_dist_x * scale; + float temp_y = (1./2.) * 1./(normal_dist_y+epsi) * normal_dist_y * scale; + float temp_w = (1./2.) * 1./(normal_dist_w+epsi) * normal_dist_w * scale; + float temp_h = (1./2.) * 1./(normal_dist_h+epsi) * normal_dist_h * scale; + + delta[index + 0*stride] = temp_x * in_exp_x * (1./x[index+1*stride]); + delta[index + 2*stride] = temp_y * in_exp_y * (1./x[index+3*stride]); + delta[index + 4*stride] = temp_w * in_exp_w * (1./x[index+5*stride]); + delta[index + 6*stride] = temp_h * in_exp_h * (1./x[index+7*stride]); + + delta[index + 1*stride] = temp_x * (in_exp_x_2/x[index+1*stride] - 1./(x[index+1*stride]+sigma_const)); + delta[index + 3*stride] = temp_y * (in_exp_y_2/x[index+3*stride] - 1./(x[index+3*stride]+sigma_const)); + delta[index + 5*stride] = temp_w * (in_exp_w_2/x[index+5*stride] - 1./(x[index+5*stride]+sigma_const)); + delta[index + 7*stride] = temp_h * (in_exp_h_2/x[index+7*stride] - 1./(x[index+7*stride]+sigma_const)); + return iou; +} + + +void delta_gaussian_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +{ + int n; + if (delta[index]){ + delta[index + stride*class] = 1 - output[index + stride*class]; + if(avg_cat) *avg_cat += output[index + stride*class]; + return; + } + for(n = 0; n < classes; ++n){ + delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; + if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + } +} + +static int entry_gaussian_index(layer l, int batch, int location, int entry) +{ + int n = location / (l.w*l.h); + int loc = location % (l.w*l.h); + return batch*l.outputs + n*l.w*l.h*(8+l.classes+1) + entry*l.w*l.h + loc; +} + +void forward_gaussian_yolo_layer(const layer l, network net) +{ + int i,j,b,t,n; + memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + +#ifndef GPU + for (b = 0; b < l.batch; ++b){ + for(n = 0; n < l.n; ++n){ + // x : mu, sigma + int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + // y : mu, sigma + index = entry_gaussian_index(l, b, n*l.w*l.h, 2); + activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + // w : sigma + index = entry_gaussian_index(l, b, n*l.w*l.h, 5); + activate_array(l.output + index, l.w*l.h, LOGISTIC); + // h : sigma + index = entry_gaussian_index(l, b, n*l.w*l.h, 7); + activate_array(l.output + index, l.w*l.h, LOGISTIC); + // objectness & class + index = entry_gaussian_index(l, b, n*l.w*l.h, 8); + activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } +#endif + + memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); + if(!net.train) return; + float avg_iou = 0; + float recall = 0; + float recall75 = 0; + float avg_cat = 0; + float avg_obj = 0; + float avg_anyobj = 0; + int count = 0; + int class_count = 0; + *(l.cost) = 0; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); + box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + float best_iou = 0; + int best_t = 0; + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box_stride(net.truth + t*(4 + 1) + b*l.truths, 1); + if(!truth.x) break; + float iou = box_iou(pred, truth); + if (iou > best_iou) { + best_iou = iou; + best_t = t; + } + } + int obj_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 8); + avg_anyobj += l.output[obj_index]; + l.delta[obj_index] = 0 - l.output[obj_index]; + if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = 0; + } + if (best_iou > l.truth_thresh) { + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + box truth = float_to_box_stride(net.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + } + } + } + } + for(t = 0; t < l.max_boxes; ++t){ + box truth = float_to_box_stride(net.truth + t*(4 + 1) + b*l.truths, 1); + + if(!truth.x) break; + float best_iou = 0; + int best_n = 0; + i = (truth.x * l.w); + j = (truth.y * l.h); + box truth_shift = truth; + truth_shift.x = truth_shift.y = 0; + for(n = 0; n < l.total; ++n){ + box pred = {0}; + pred.w = l.biases[2*n]/net.w; + pred.h = l.biases[2*n+1]/net.h; + float iou = box_iou(pred, truth_shift); + if (iou > best_iou){ + best_iou = iou; + best_n = n; + } + } + + int mask_n = int_index(l.mask, best_n, l.n); + if(mask_n >= 0){ + int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + + int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = 1 - l.output[obj_index]; + + int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class = l.map[class]; + int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if(iou > .5) recall += 1; + if(iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); +} + +void backward_gaussian_yolo_layer(const layer l, network net) +{ + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); +} + +void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + int new_w=0; + int new_h=0; + if (((float)netw/w) < ((float)neth/h)) { + new_w = netw; + new_h = (h * netw)/w; + } else { + new_h = neth; + new_w = (w * neth)/h; + } + for (i = 0; i < n; ++i){ + box b = dets[i].bbox; + b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); + b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); + b.w *= (float)netw/new_w; + b.h *= (float)neth/new_h; + if(!relative){ + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + dets[i].bbox = b; + } +} + +int gaussian_yolo_num_detections(layer l, float thresh) +{ + int i, n; + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + for(n = 0; n < l.n; ++n){ + int obj_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 8); + if(l.output[obj_index] > thresh){ + ++count; + } + } + } + return count; +} + +/* +void avg_flipped_gaussian_yolo(layer l) +{ + int i,j,n,z; + float *flip = l.output + l.outputs; + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w/2; ++i) { + for (n = 0; n < l.n; ++n) { + for(z = 0; z < l.classes + 8 + 1; ++z){ + int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; + int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); + float swap = flip[i1]; + flip[i1] = flip[i2]; + flip[i2] = swap; + if(z == 0){ + flip[i1] = -flip[i1]; + flip[i2] = -flip[i2]; + } + } + } + } + } + for(i = 0; i < l.outputs; ++i){ + l.output[i] = (l.output[i] + flip[i])/2.; + } +} +*/ + +int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +{ + int i,j,n; + float *predictions = l.output; + //if (l.batch == 2) avg_flipped_gaussian_yolo(l); + int count = 0; + for (i = 0; i < l.w*l.h; ++i){ + int row = i / l.w; + int col = i % l.w; + for(n = 0; n < l.n; ++n){ + int obj_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 8); + float objectness = predictions[obj_index]; + if(objectness <= thresh) continue; + int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + + dets[count].uc[0] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 1)]; // tx uncertainty + dets[count].uc[1] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 3)]; // ty uncertainty + dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty + dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty + + for(j = 0; j < l.classes; ++j){ + int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); + float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3])/4.0; + float prob = objectness*predictions[class_index]*(1.0-uc_aver); + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; + } + } + correct_gaussian_yolo_boxes(dets, count, w, h, netw, neth, relative); + return count; +} + +#ifdef GPU + +void forward_gaussian_yolo_layer_gpu(const layer l, network net) +{ + copy_ongpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + int b, n; + for (b = 0; b < l.batch; ++b) + { + for(n = 0; n < l.n; ++n) + { + // x : mu, sigma + int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); + activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + // y : mu, sigma + index = entry_gaussian_index(l, b, n*l.w*l.h, 2); + activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + // w : sigma + index = entry_gaussian_index(l, b, n*l.w*l.h, 5); + activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + // h : sigma + index = entry_gaussian_index(l, b, n*l.w*l.h, 7); + activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); + // objectness & class + index = entry_gaussian_index(l, b, n*l.w*l.h, 8); + activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); + } + } + if(!net.train || l.onlyforward){ + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + return; + } + + cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); + forward_gaussian_yolo_layer(l, net); + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); +} + +void backward_gaussian_yolo_layer_gpu(const layer l, network net) +{ + axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); +} +#endif diff --git a/src/gaussian_yolo_layer.h b/src/gaussian_yolo_layer.h new file mode 100644 index 00000000000..96cb2a8f4a6 --- /dev/null +++ b/src/gaussian_yolo_layer.h @@ -0,0 +1,20 @@ +//Gaussian YOLOv3 implementation +#ifndef GAUSSIAN_YOLO_LAYER_H +#define GAUSSIAN_YOLO_LAYER_H + +#include "darknet.h" +#include "layer.h" +#include "network.h" + +layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); +void forward_gaussian_yolo_layer(const layer l, network net); +void backward_gaussian_yolo_layer(const layer l, network net); +void resize_gaussian_yolo_layer(layer *l, int w, int h); +int gaussian_yolo_num_detections(layer l, float thresh); + +#ifdef GPU +void forward_gaussian_yolo_layer_gpu(const layer l, network net); +void backward_gaussian_yolo_layer_gpu(layer l, network net); +#endif + +#endif diff --git a/src/network.c b/src/network.c index 82dc4d53978..0658788044c 100644 --- a/src/network.c +++ b/src/network.c @@ -34,6 +34,7 @@ #include "shortcut_layer.h" #include "scale_channels_layer.h" #include "yolo_layer.h" +#include "gaussian_yolo_layer.h" #include "upsample_layer.h" #include "parser.h" @@ -202,6 +203,10 @@ char *get_layer_string(LAYER_TYPE a) return "detection"; case REGION: return "region"; + case YOLO: + return "yolo"; + case GAUSSIAN_YOLO: + return "Gaussian_yolo"; case DROPOUT: return "dropout"; case CROP: @@ -524,6 +529,8 @@ int resize_network(network *net, int w, int h) resize_region_layer(&l, w, h); }else if (l.type == YOLO) { resize_yolo_layer(&l, w, h); + }else if (l.type == GAUSSIAN_YOLO) { + resize_gaussian_yolo_layer(&l, w, h); }else if(l.type == ROUTE){ resize_route_layer(&l, net); }else if (l.type == SHORTCUT) { @@ -687,6 +694,9 @@ int num_detections(network *net, float thresh) if (l.type == YOLO) { s += yolo_num_detections(l, thresh); } + if (l.type == GAUSSIAN_YOLO) { + s += gaussian_yolo_num_detections(l, thresh); + } if (l.type == DETECTION || l.type == REGION) { s += l.w*l.h*l.n; } @@ -703,6 +713,8 @@ detection *make_network_boxes(network *net, float thresh, int *num) detection* dets = (detection*)calloc(nboxes, sizeof(detection)); for (i = 0; i < nboxes; ++i) { dets[i].prob = (float*)calloc(l.classes, sizeof(float)); + // tx,ty,tw,th uncertainty + dets[i].uc = calloc(4, sizeof(float)); // Gaussian_YOLOv3 if (l.coords > 4) { dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float)); } @@ -749,6 +761,10 @@ void fill_network_boxes(network *net, int w, int h, float thresh, float hier, in prev_classes, l.classes); } } + if (l.type == GAUSSIAN_YOLO) { + int count = get_gaussian_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + dets += count; + } if (l.type == REGION) { custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter); //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); diff --git a/src/parser.c b/src/parser.c index 829134d1131..b31c7673102 100644 --- a/src/parser.c +++ b/src/parser.c @@ -38,6 +38,7 @@ #include "upsample_layer.h" #include "version.h" #include "yolo_layer.h" +#include "gaussian_yolo_layer.h" typedef struct{ char *type; @@ -57,6 +58,7 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[detection]")==0) return DETECTION; if (strcmp(type, "[region]")==0) return REGION; if (strcmp(type, "[yolo]") == 0) return YOLO; + if (strcmp(type, "[Gaussian_yolo]") == 0) return GAUSSIAN_YOLO; if (strcmp(type, "[local]")==0) return LOCAL; if (strcmp(type, "[conv]")==0 || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; @@ -390,6 +392,67 @@ layer parse_yolo(list *options, size_params params) return l; } + +int *parse_gaussian_yolo_mask(char *a, int *num) // Gaussian_YOLOv3 +{ + int *mask = 0; + if (a) { + int len = strlen(a); + int n = 1; + int i; + for (i = 0; i < len; ++i) { + if (a[i] == ',') ++n; + } + mask = calloc(n, sizeof(int)); + for (i = 0; i < n; ++i) { + int val = atoi(a); + mask[i] = val; + a = strchr(a, ',') + 1; + } + *num = n; + } + return mask; +} + + +layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 +{ + int classes = option_find_int(options, "classes", 20); + int total = option_find_int(options, "num", 1); + int num = total; + + char *a = option_find_str(options, "mask", 0); + int *mask = parse_gaussian_yolo_mask(a, &num); + layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + assert(l.outputs == params.inputs); + + l.max_boxes = option_find_int_quiet(options, "max", 90); + l.jitter = option_find_float(options, "jitter", .2); + + l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); + l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.random = option_find_int_quiet(options, "random", 0); + + char *map_file = option_find_str(options, "map", 0); + if (map_file) l.map = read_map(map_file); + + a = option_find_str(options, "anchors", 0); + if (a) { + int len = strlen(a); + int n = 1; + int i; + for (i = 0; i < len; ++i) { + if (a[i] == ',') ++n; + } + for (i = 0; i < n; ++i) { + float bias = atof(a); + l.biases[i] = bias; + a = strchr(a, ',') + 1; + } + } + return l; +} + layer parse_region(list *options, size_params params) { int coords = option_find_int(options, "coords", 4); @@ -923,6 +986,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) l = parse_region(options, params); }else if (lt == YOLO) { l = parse_yolo(options, params); + }else if (lt == GAUSSIAN_YOLO) { + l = parse_gaussian_yolo(options, params); }else if(lt == DETECTION){ l = parse_detection(options, params); }else if(lt == SOFTMAX){ diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 2006f4b8047..424811df6e6 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -242,16 +242,6 @@ static int entry_index(layer l, int batch, int location, int entry) return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; } -static box float_to_box_stride(float *f, int stride) -{ - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; -} - void forward_yolo_layer(const layer l, network_state state) { int i, j, b, t, n; From 24788b806175a9cfbbeb0e2057f0673cd0d3e657 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 25 Oct 2019 20:56:58 +0300 Subject: [PATCH 35/86] Compile fix --- src/image_opencv.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index cc50c0719a1..d4a69e6f197 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1125,15 +1125,6 @@ void draw_train_loss(mat_cv* img_src, int img_size, float avg_loss, float max_im // ==================================================================== // Data augmentation // ==================================================================== -static box float_to_box_stride(float *f, int stride) -{ - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; -} image image_data_augmentation(mat_cv* mat, int w, int h, int pleft, int ptop, int swidth, int sheight, int flip, From 72f6de30b2a75fd67436cff4638b8fa36e3fb205 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 25 Oct 2019 22:14:27 +0300 Subject: [PATCH 36/86] another compile fix --- src/gaussian_yolo_layer.c | 2 +- src/parser.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 320834018d9..74604e52346 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -23,7 +23,7 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) { int i; - layer l = {0}; + layer l = { (LAYER_TYPE)0 }; l.type = GAUSSIAN_YOLO; l.n = n; diff --git a/src/parser.c b/src/parser.c index b31c7673102..445771ca4aa 100644 --- a/src/parser.c +++ b/src/parser.c @@ -159,9 +159,15 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int n = option_find_int(options, "filters",1); int groups = option_find_int_quiet(options, "groups", 1); int size = option_find_int(options, "size",1); - int stride = option_find_int(options, "stride",1); - int stride_x = option_find_int_quiet(options, "stride_x", stride); - int stride_y = option_find_int_quiet(options, "stride_y", stride); + int stride = -1; + //int stride = option_find_int(options, "stride",1); + int stride_x = option_find_int_quiet(options, "stride_x", -1); + int stride_y = option_find_int_quiet(options, "stride_y", -1); + if (stride_x < 1 || stride_y < 1) { + stride = option_find_int(options, "stride", 1); + if (stride_x < 1) stride_x = stride; + if (stride_y < 1) stride_y = stride; + } int dilation = option_find_int_quiet(options, "dilation", 1); int antialiasing = option_find_int_quiet(options, "antialiasing", 0); if (size == 1) dilation = 1; From f18338de2667a402cd78e61f50423f4953cd32cc Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 26 Oct 2019 01:29:41 +0300 Subject: [PATCH 37/86] Fixed [Gaussian_yolo] layer (tested for training and detection) --- src/gaussian_yolo_layer.c | 154 ++++++++++++++++++++++++++++---------- src/gaussian_yolo_layer.h | 12 +-- src/network.c | 5 +- src/parser.c | 3 +- 4 files changed, 127 insertions(+), 47 deletions(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 74604e52346..ddb43939ce9 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -20,7 +20,7 @@ #define M_PI 3.141592 #endif -layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) +layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes) { int i; layer l = { (LAYER_TYPE)0 }; @@ -36,21 +36,22 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m l.out_h = l.h; l.out_c = l.c; l.classes = classes; - l.cost = calloc(1, sizeof(float)); - l.biases = calloc(total*2, sizeof(float)); + l.cost = (float*)calloc(1, sizeof(float)); + l.biases = (float*)calloc(total*2, sizeof(float)); if(mask) l.mask = mask; else{ - l.mask = calloc(n, sizeof(int)); + l.mask = (int*)calloc(n, sizeof(int)); for(i = 0; i < n; ++i){ l.mask[i] = i; } } - l.bias_updates = calloc(n*2, sizeof(float)); + l.bias_updates = (float*)calloc(n*2, sizeof(float)); l.outputs = h*w*n*(classes + 8 + 1); l.inputs = l.outputs; - l.truths = 90*(4 + 1); - l.delta = calloc(batch*l.outputs, sizeof(float)); - l.output = calloc(batch*l.outputs, sizeof(float)); + l.max_boxes = max_boxes; + l.truths = l.max_boxes*(4 + 1); + l.delta = (float*)calloc(batch*l.outputs, sizeof(float)); + l.output = (float*)calloc(batch*l.outputs, sizeof(float)); for(i = 0; i < total*2; ++i){ l.biases[i] = .5; } @@ -62,10 +63,26 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m l.backward_gpu = backward_gaussian_yolo_layer_gpu; l.output_gpu = cuda_make_array(l.output, batch*l.outputs); l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); + + /* + free(l.output); + if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; + else { + cudaGetLastError(); // reset CUDA-error + l.output = (float*)calloc(batch * l.outputs, sizeof(float)); + } + + free(l.delta); + if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; + else { + cudaGetLastError(); // reset CUDA-error + l.delta = (float*)calloc(batch * l.outputs, sizeof(float)); + } + */ #endif fprintf(stderr, "Gaussian_yolo\n"); - srand(0); + srand(time(0)); return l; } @@ -78,10 +95,33 @@ void resize_gaussian_yolo_layer(layer *l, int w, int h) l->outputs = h*w*l->n*(l->classes + 8 + 1); l->inputs = l->outputs; - l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); - l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); + l->output = realloc(l->output, l->batch*l->outputs * sizeof(float)); + l->delta = realloc(l->delta, l->batch*l->outputs * sizeof(float)); + + //if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float)); + //if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs * sizeof(float)); #ifdef GPU + /* + if (l->output_pinned) { + cudaFreeHost(l->output); + if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { + cudaGetLastError(); // reset CUDA-error + l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float)); + l->output_pinned = 0; + } + } + + if (l->delta_pinned) { + cudaFreeHost(l->delta); + if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { + cudaGetLastError(); // reset CUDA-error + l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float)); + l->delta_pinned = 0; + } + } + */ + cuda_free(l->delta_gpu); cuda_free(l->output_gpu); @@ -168,10 +208,10 @@ static int entry_gaussian_index(layer l, int batch, int location, int entry) return batch*l.outputs + n*l.w*l.h*(8+l.classes+1) + entry*l.w*l.h + loc; } -void forward_gaussian_yolo_layer(const layer l, network net) +void forward_gaussian_yolo_layer(const layer l, network_state state) { int i,j,b,t,n; - memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float)); + memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); #ifndef GPU for (b = 0; b < l.batch; ++b){ @@ -196,7 +236,7 @@ void forward_gaussian_yolo_layer(const layer l, network net) #endif memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); - if(!net.train) return; + if (!state.train) return; float avg_iou = 0; float recall = 0; float recall75 = 0; @@ -211,11 +251,11 @@ void forward_gaussian_yolo_layer(const layer l, network net) for (i = 0; i < l.w; ++i) { for (n = 0; n < l.n; ++n) { int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); - box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h); + box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); float best_iou = 0; int best_t = 0; for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box_stride(net.truth + t*(4 + 1) + b*l.truths, 1); + box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); if(!truth.x) break; float iou = box_iou(pred, truth); if (iou > best_iou) { @@ -232,18 +272,18 @@ void forward_gaussian_yolo_layer(const layer l, network net) if (best_iou > l.truth_thresh) { l.delta[obj_index] = 1 - l.output[obj_index]; - int class = net.truth[best_t*(4 + 1) + b*l.truths + 4]; + int class = state.truth[best_t*(4 + 1) + b*l.truths + 4]; if (l.map) class = l.map[class]; int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); - box truth = float_to_box_stride(net.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); } } } } for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box_stride(net.truth + t*(4 + 1) + b*l.truths, 1); + box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); if(!truth.x) break; float best_iou = 0; @@ -254,8 +294,8 @@ void forward_gaussian_yolo_layer(const layer l, network net) truth_shift.x = truth_shift.y = 0; for(n = 0; n < l.total; ++n){ box pred = {0}; - pred.w = l.biases[2*n]/net.w; - pred.h = l.biases[2*n+1]/net.h; + pred.w = l.biases[2*n]/ state.net.w; + pred.h = l.biases[2*n+1]/ state.net.h; float iou = box_iou(pred, truth_shift); if (iou > best_iou){ best_iou = iou; @@ -266,13 +306,13 @@ void forward_gaussian_yolo_layer(const layer l, network net) int mask_n = int_index(l.mask, best_n, l.n); if(mask_n >= 0){ int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; l.delta[obj_index] = 1 - l.output[obj_index]; - int class = net.truth[t*(4 + 1) + b*l.truths + 4]; + int class = state.truth[t*(4 + 1) + b*l.truths + 4]; if (l.map) class = l.map[class]; int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); @@ -286,19 +326,34 @@ void forward_gaussian_yolo_layer(const layer l, network net) } } *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); } -void backward_gaussian_yolo_layer(const layer l, network net) +void backward_gaussian_yolo_layer(const layer l, network_state state) { - axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1); + axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); } -void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) { int i; int new_w=0; int new_h=0; + if (letter) { + if (((float)netw / w) < ((float)neth / h)) { + new_w = netw; + new_h = (h * netw) / w; + } + else { + new_h = neth; + new_w = (w * neth) / h; + } + } + else { + new_w = netw; + new_h = neth; + } + /* if (((float)netw/w) < ((float)neth/h)) { new_w = netw; new_h = (h * netw)/w; @@ -306,6 +361,7 @@ void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, new_h = neth; new_w = (w * neth)/h; } + */ for (i = 0; i < n; ++i){ box b = dets[i].bbox; b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); @@ -365,7 +421,7 @@ void avg_flipped_gaussian_yolo(layer l) } */ -int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets) +int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter) { int i,j,n; float *predictions = l.output; @@ -390,22 +446,22 @@ int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, floa for(j = 0; j < l.classes; ++j){ int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); - float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3])/4.0; + float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; float prob = objectness*predictions[class_index]*(1.0-uc_aver); dets[count].prob[j] = (prob > thresh) ? prob : 0; } ++count; } } - correct_gaussian_yolo_boxes(dets, count, w, h, netw, neth, relative); + correct_gaussian_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); return count; } #ifdef GPU -void forward_gaussian_yolo_layer_gpu(const layer l, network net) +void forward_gaussian_yolo_layer_gpu(const layer l, network_state state) { - copy_ongpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1); + copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); int b, n; for (b = 0; b < l.batch; ++b) { @@ -428,18 +484,38 @@ void forward_gaussian_yolo_layer_gpu(const layer l, network net) activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); } } - if(!net.train || l.onlyforward){ - cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + + if (!state.train || l.onlyforward) { + //cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + cuda_pull_array_async(l.output_gpu, l.output, l.batch*l.outputs); + CHECK_CUDA(cudaPeekAtLastError()); return; } - cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs); - forward_gaussian_yolo_layer(l, net); + float *in_cpu = (float *)calloc(l.batch*l.inputs, sizeof(float)); + cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); + memcpy(in_cpu, l.output, l.batch*l.outputs * sizeof(float)); + float *truth_cpu = 0; + if (state.truth) { + int num_truth = l.batch*l.truths; + truth_cpu = (float *)calloc(num_truth, sizeof(float)); + cuda_pull_array(state.truth, truth_cpu, num_truth); + } + network_state cpu_state = state; + cpu_state.net = state.net; + cpu_state.index = state.index; + cpu_state.train = state.train; + cpu_state.truth = truth_cpu; + cpu_state.input = in_cpu; + forward_gaussian_yolo_layer(l, cpu_state); + //forward_yolo_layer(l, state); cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); + free(in_cpu); + if (cpu_state.truth) free(cpu_state.truth); } -void backward_gaussian_yolo_layer_gpu(const layer l, network net) +void backward_gaussian_yolo_layer_gpu(const layer l, network_state state) { - axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); + axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1); } #endif diff --git a/src/gaussian_yolo_layer.h b/src/gaussian_yolo_layer.h index 96cb2a8f4a6..9080881dc68 100644 --- a/src/gaussian_yolo_layer.h +++ b/src/gaussian_yolo_layer.h @@ -6,15 +6,17 @@ #include "layer.h" #include "network.h" -layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); -void forward_gaussian_yolo_layer(const layer l, network net); -void backward_gaussian_yolo_layer(const layer l, network net); +layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes); +void forward_gaussian_yolo_layer(const layer l, network_state state); +void backward_gaussian_yolo_layer(const layer l, network_state state); void resize_gaussian_yolo_layer(layer *l, int w, int h); int gaussian_yolo_num_detections(layer l, float thresh); +int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter); +void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter); #ifdef GPU -void forward_gaussian_yolo_layer_gpu(const layer l, network net); -void backward_gaussian_yolo_layer_gpu(layer l, network net); +void forward_gaussian_yolo_layer_gpu(const layer l, network_state state); +void backward_gaussian_yolo_layer_gpu(layer l, network_state state); #endif #endif diff --git a/src/network.c b/src/network.c index 0658788044c..cfe994343e8 100644 --- a/src/network.c +++ b/src/network.c @@ -714,7 +714,7 @@ detection *make_network_boxes(network *net, float thresh, int *num) for (i = 0; i < nboxes; ++i) { dets[i].prob = (float*)calloc(l.classes, sizeof(float)); // tx,ty,tw,th uncertainty - dets[i].uc = calloc(4, sizeof(float)); // Gaussian_YOLOv3 + dets[i].uc = (float*)calloc(4, sizeof(float)); // Gaussian_YOLOv3 if (l.coords > 4) { dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float)); } @@ -762,7 +762,7 @@ void fill_network_boxes(network *net, int w, int h, float thresh, float hier, in } } if (l.type == GAUSSIAN_YOLO) { - int count = get_gaussian_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets); + int count = get_gaussian_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter); dets += count; } if (l.type == REGION) { @@ -789,6 +789,7 @@ void free_detections(detection *dets, int n) int i; for (i = 0; i < n; ++i) { free(dets[i].prob); + if (dets[i].uc) free(dets[i].uc); if (dets[i].mask) free(dets[i].mask); } free(dets); diff --git a/src/parser.c b/src/parser.c index 445771ca4aa..1367d35b06a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -424,12 +424,13 @@ int *parse_gaussian_yolo_mask(char *a, int *num) // Gaussian_YOLOv3 layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 { int classes = option_find_int(options, "classes", 20); + int max_boxes = option_find_int_quiet(options, "max", 90); int total = option_find_int(options, "num", 1); int num = total; char *a = option_find_str(options, "mask", 0); int *mask = parse_gaussian_yolo_mask(a, &num); - layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); + layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); assert(l.outputs == params.inputs); l.max_boxes = option_find_int_quiet(options, "max", 90); From 29c71a190acb82aa4beda8762e087b658f4b0347 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 26 Oct 2019 01:30:11 +0300 Subject: [PATCH 38/86] Fixed antialiasing=1 for [convolutional] layer --- src/convolutional_kernels.cu | 4 +++- src/maxpool_layer_kernels.cu | 4 +++- src/yolo_layer.c | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 23005ccb91e..cb5a1b99bae 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -628,11 +628,13 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state s.train = state.train; s.workspace = state.workspace; s.net = state.net; - s.delta = l.delta_gpu; + s.delta = l.delta_gpu; // s.delta will be returned to l.delta_gpu s.input = l.input_antialiasing_gpu; //if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.delta_gpu, l.input_layer->delta_gpu); backward_convolutional_layer_gpu(*(l.input_layer), s); + + simple_copy_ongpu(l.outputs*l.batch, l.input_antialiasing_gpu, l.output_gpu); } if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index cc546a0b50c..8677b4d713d 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -199,11 +199,13 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st s.train = state.train; s.workspace = state.workspace; s.net = state.net; - s.delta = layer.delta_gpu; + s.delta = layer.delta_gpu; // s.delta will be returned to l.delta_gpu s.input = layer.input_antialiasing_gpu; //if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) simple_copy_ongpu(layer.input_layer->outputs*layer.input_layer->batch, layer.delta_gpu, layer.input_layer->delta_gpu); backward_convolutional_layer_gpu(*(layer.input_layer), s); + + //simple_copy_ongpu(layer.outputs*layer.batch, layer.input_antialiasing_gpu, layer.output_gpu); } if (layer.maxpool_depth) { diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 424811df6e6..906ed427f01 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -423,7 +423,8 @@ void forward_yolo_layer(const layer l, network_state state) } *(l.cost) = avg_iou_loss + classification_loss; } - printf("v3 (%s loss, Normalizer: (iou: %f, cls: %f) Region %d Avg (IOU: %f, GIOU: %f), Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer, state.index, tot_iou / count, tot_giou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count); + printf("v3 (%s loss, Normalizer: (iou: %f, cls: %f) Region %d Avg (IOU: %f, GIOU: %f), Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", + (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer, state.index, tot_iou / count, tot_giou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count); } void backward_yolo_layer(const layer l, network_state state) From fa7687e6b5936532ce35a607509fa5736e8f3c0d Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 26 Oct 2019 01:40:59 +0300 Subject: [PATCH 39/86] cfg-param blur > 1 is fixed (bilateral filter removes textures) --- src/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data.c b/src/data.c index c0af1ab857d..6c9e565db89 100644 --- a/src/data.c +++ b/src/data.c @@ -922,7 +922,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int min_w_h = fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - if (min_w_h < blur*4) blur = 0; // disable blur if one of the objects is too small + if (min_w_h/4 < blur) blur = min_w_h / 4; // disable blur if one of the objects is too small image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, blur, boxes, d.y.vals[i]); From f8c72acd42d73d0ee200d92b3d605f50e0928f5a Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 26 Oct 2019 14:47:00 +0300 Subject: [PATCH 40/86] Added scale_x_y to [Gaussian_yolo]. Fixed blur=10; --- src/data.c | 8 +++-- src/gaussian_yolo_layer.c | 67 +++++++++++++++++++++------------------ src/image_opencv.cpp | 5 ++- src/parser.c | 1 + 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/src/data.c b/src/data.c index 6c9e565db89..1b9117d93cf 100644 --- a/src/data.c +++ b/src/data.c @@ -875,7 +875,11 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo dexp = rand_scale(exposure); flip = use_flip ? random_gen() % 2 : 0; - blur = rand_int(0, 1) ? (use_blur) : 0; + + //blur = rand_int(0, 1) ? (use_blur) : 0; + int tmp_blur = rand_int(0, 2); // 0 - disable, 1 - blur background, 2 - blur the whole image + if (tmp_blur == 2) blur = use_blur; + else blur = tmp_blur; } int pleft = rand_precalc_random(-dw, dw, r1); @@ -922,7 +926,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int min_w_h = fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - if (min_w_h/4 < blur) blur = min_w_h / 4; // disable blur if one of the objects is too small + if (min_w_h/4 < blur && blur > 1) blur = min_w_h / 4; // disable blur if one of the objects is too small image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, blur, boxes, d.y.vals[i]); diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index ddb43939ce9..3b58cc5a404 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -95,8 +95,8 @@ void resize_gaussian_yolo_layer(layer *l, int w, int h) l->outputs = h*w*l->n*(l->classes + 8 + 1); l->inputs = l->outputs; - l->output = realloc(l->output, l->batch*l->outputs * sizeof(float)); - l->delta = realloc(l->delta, l->batch*l->outputs * sizeof(float)); + l->output = (float *)realloc(l->output, l->batch*l->outputs * sizeof(float)); + l->delta = (float *)realloc(l->delta, l->batch*l->outputs * sizeof(float)); //if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float)); //if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs * sizeof(float)); @@ -187,17 +187,17 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind } -void delta_gaussian_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +void delta_gaussian_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat) { int n; if (delta[index]){ - delta[index + stride*class] = 1 - output[index + stride*class]; - if(avg_cat) *avg_cat += output[index + stride*class]; + delta[index + stride*class_id] = 1 - output[index + stride*class_id]; + if(avg_cat) *avg_cat += output[index + stride*class_id]; return; } for(n = 0; n < classes; ++n){ - delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; - if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + delta[index + stride*n] = ((n == class_id)?1 : 0) - output[index + stride*n]; + if(n == class_id && avg_cat) *avg_cat += output[index + stride*n]; } } @@ -219,9 +219,11 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) // x : mu, sigma int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + scal_add_cpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale x // y : mu, sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 2); activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + scal_add_cpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale y // w : sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 5); activate_array(l.output + index, l.w*l.h, LOGISTIC); @@ -272,10 +274,10 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if (best_iou > l.truth_thresh) { l.delta[obj_index] = 1 - l.output[obj_index]; - int class = state.truth[best_t*(4 + 1) + b*l.truths + 4]; - if (l.map) class = l.map[class]; + int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); - delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); } @@ -312,10 +314,10 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) avg_obj += l.output[obj_index]; l.delta[obj_index] = 1 - l.output[obj_index]; - int class = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class = l.map[class]; + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); - delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat); ++count; ++class_count; @@ -433,24 +435,27 @@ int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, floa for(n = 0; n < l.n; ++n){ int obj_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 8); float objectness = predictions[obj_index]; - if(objectness <= thresh) continue; - int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); - dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); - dets[count].objectness = objectness; - dets[count].classes = l.classes; - - dets[count].uc[0] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 1)]; // tx uncertainty - dets[count].uc[1] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 3)]; // ty uncertainty - dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty - dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty - - for(j = 0; j < l.classes; ++j){ - int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); - float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; - float prob = objectness*predictions[class_index]*(1.0-uc_aver); - dets[count].prob[j] = (prob > thresh) ? prob : 0; + if (objectness <= thresh) continue; // incorrect behavior for Nan values + + if (objectness > thresh) { + int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + + dets[count].uc[0] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 1)]; // tx uncertainty + dets[count].uc[1] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 3)]; // ty uncertainty + dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty + dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty + + for (j = 0; j < l.classes; ++j) { + int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); + float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; + float prob = objectness*predictions[class_index] * (1.0 - uc_aver); + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; } - ++count; } } correct_gaussian_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); @@ -470,9 +475,11 @@ void forward_gaussian_yolo_layer_gpu(const layer l, network_state state) // x : mu, sigma int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + scal_add_ongpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale x // y : mu, sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 2); activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + scal_add_ongpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale y // w : sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 5); activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index d4a69e6f197..a9d3b560cd8 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1196,7 +1196,10 @@ image image_data_augmentation(mat_cv* mat, int w, int h, if (blur) { cv::Mat dst(sized.size(), sized.type()); - if(blur == 1) cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); + if (blur == 1) { + //cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); + cv::bilateralFilter(sized, dst, 31, 75, 75); + } else { int ksize = (blur / 2) * 2 + 1; cv::Size kernel_size = cv::Size(ksize, ksize); diff --git a/src/parser.c b/src/parser.c index 1367d35b06a..e0962f02edc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -433,6 +433,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); assert(l.outputs == params.inputs); + l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); l.max_boxes = option_find_int_quiet(options, "max", 90); l.jitter = option_find_float(options, "jitter", .2); From 69b7a19f5bfce760debf2057b71b0882c535f350 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 27 Oct 2019 02:45:31 +0300 Subject: [PATCH 41/86] minor fix --- src/data.c | 3 ++- src/parser.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/data.c b/src/data.c index 1b9117d93cf..2780a82c85b 100644 --- a/src/data.c +++ b/src/data.c @@ -841,7 +841,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo d.y = make_matrix(n, 5*boxes); int i_mixup = 0; for (i_mixup = 0; i_mixup <= mixup; i_mixup++) { - if (i_mixup) augmentation_calculated = 0; + if (i_mixup) augmentation_calculated = 0; // recalculate augmentation for the 2nd sequence if(track==1) + for (i = 0; i < n; ++i) { float *truth = (float*)calloc(5 * boxes, sizeof(float)); const char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i]; diff --git a/src/parser.c b/src/parser.c index e0962f02edc..f425560f04a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -409,7 +409,7 @@ int *parse_gaussian_yolo_mask(char *a, int *num) // Gaussian_YOLOv3 for (i = 0; i < len; ++i) { if (a[i] == ',') ++n; } - mask = calloc(n, sizeof(int)); + mask = (int *)calloc(n, sizeof(int)); for (i = 0; i < n; ++i) { int val = atoi(a); mask[i] = val; From 6e736339259142c87bd86fd7362eac4e6043a8cc Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 29 Oct 2019 14:53:57 +0300 Subject: [PATCH 42/86] Fixed Blur and Try to use new Assisted Excitation. --- src/convolutional_kernels.cu | 33 +++++++++++++++++++++++---------- src/data.c | 2 +- src/image_opencv.cpp | 2 +- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index cb5a1b99bae..0b94dd29db4 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -978,10 +978,14 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; //float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); - if (l.assisted_excitation > 1) { - if (iteration_num < state.net.burn_in) alpha = 0; - else if (iteration_num > l.assisted_excitation) alpha = 0; - else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)) / 2; + if (l.assisted_excitation == 1) { + if (iteration_num > state.net.max_batches / 2) return; + } + else { + if (iteration_num < state.net.burn_in) return; + else if (iteration_num > l.assisted_excitation) return; + else + alpha = (1 + cos(3.141592 * iteration_num / (state.net.burn_in + l.assisted_excitation))) / 2; // from 1 to 0 } //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", @@ -1011,11 +1015,19 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) for (t = 0; t < state.net.num_boxes; ++t) { box truth = float_to_box_stride(truth_cpu + t*(4 + 1) + b*l.truths, 1); if (!truth.x) break; // continue; - - int left = floor((truth.x - truth.w / 2) * l.out_w); - int right = ceil((truth.x + truth.w / 2) * l.out_w); - int top = floor((truth.y - truth.h / 2) * l.out_h); - int bottom = ceil((truth.y + truth.h / 2) * l.out_h); + float beta = 1 - alpha; // from 0 to 1 + float dw = (1 - truth.w) * beta; + float dh = (1 - truth.h) * beta; + //printf(" alpha = %f, beta = %f, truth.w = %f, dw = %f, tw+dw = %f, l.out_w = %d \n", alpha, beta, truth.w, dw, truth.w+dw, l.out_w); + + int left = floor((truth.x - (dw + truth.w) / 2) * l.out_w); + int right = ceil((truth.x + (dw + truth.w) / 2) * l.out_w); + int top = floor((truth.y - (dh + truth.h) / 2) * l.out_h); + int bottom = ceil((truth.y + (dh + truth.h) / 2) * l.out_h); + if (left < 0) left = 0; + if (top < 0) top = 0; + if (right > l.out_w) right = l.out_w; + if (bottom > l.out_h) bottom = l.out_h; for (w = left; w <= right; w++) { for (h = top; h < bottom; h++) { @@ -1035,7 +1047,8 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) //CHECK_CUDA(cudaPeekAtLastError()); // calc new output - assisted_activation2_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); + assisted_activation2_gpu(1, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); // AE3: gt increases (beta = 1 - alpha = 0) + //assisted_activation2_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); //assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); //cudaStreamSynchronize(get_cuda_stream()); //CHECK_CUDA(cudaPeekAtLastError()); diff --git a/src/data.c b/src/data.c index 2780a82c85b..622e401ef75 100644 --- a/src/data.c +++ b/src/data.c @@ -927,7 +927,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int min_w_h = fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - if (min_w_h/4 < blur && blur > 1) blur = min_w_h / 4; // disable blur if one of the objects is too small + if (min_w_h / 8 < blur && blur > 1) blur = min_w_h / 8; // disable blur if one of the objects is too small image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, blur, boxes, d.y.vals[i]); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index a9d3b560cd8..912a0b1b6d2 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1198,7 +1198,7 @@ image image_data_augmentation(mat_cv* mat, int w, int h, cv::Mat dst(sized.size(), sized.type()); if (blur == 1) { //cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); - cv::bilateralFilter(sized, dst, 31, 75, 75); + cv::bilateralFilter(sized, dst, 17, 75, 75); } else { int ksize = (blur / 2) * 2 + 1; From 4d9addedd719cfb5d68cb3b39d5c7f9193071c0d Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 29 Oct 2019 14:55:12 +0300 Subject: [PATCH 43/86] Fixed legacy compilation using MSVS (gaussian_yolo_layer) --- build/darknet/darknet_no_gpu.vcxproj | 2 ++ build/darknet/yolo_cpp_dll.vcxproj | 2 ++ build/darknet/yolo_cpp_dll_no_gpu.vcxproj | 2 ++ 3 files changed, 6 insertions(+) diff --git a/build/darknet/darknet_no_gpu.vcxproj b/build/darknet/darknet_no_gpu.vcxproj index dcfce05d057..72d23af3d09 100644 --- a/build/darknet/darknet_no_gpu.vcxproj +++ b/build/darknet/darknet_no_gpu.vcxproj @@ -203,6 +203,7 @@ + @@ -267,6 +268,7 @@ + diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index d3f60a0260f..813cae31e9d 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -201,6 +201,7 @@ + @@ -267,6 +268,7 @@ + diff --git a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj index f719d3f28b7..c9f3ddfa29c 100644 --- a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj +++ b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj @@ -187,6 +187,7 @@ + @@ -253,6 +254,7 @@ + From 52e3bb252fddadf73957000303fa1a2ca247e306 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 4 Nov 2019 14:39:20 +0300 Subject: [PATCH 44/86] Use non-blocking sockets for JSON_server and MJPEG_server --- include/yolo_v2_class.hpp | 1 + src/http_stream.cpp | 28 ++++++++++++++++++++++++++++ src/parser.c | 3 +++ src/yolo_v2_class.cpp | 9 ++++++--- 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp index f25610d0424..5543df7f8fc 100644 --- a/include/yolo_v2_class.hpp +++ b/include/yolo_v2_class.hpp @@ -70,6 +70,7 @@ extern "C" LIB_API void send_json_custom(char const* send_buf, int port, int tim class Detector { std::shared_ptr detector_gpu_ptr; std::deque> prev_bbox_vec_deque; + std::string _cfg_filename, _weight_filename; public: const int cur_gpu_id; float nms = .4; diff --git a/src/http_stream.cpp b/src/http_stream.cpp index af2e6730c74..927a65bcf35 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -148,6 +148,20 @@ class JSON_sender if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) < 0) cerr << "setsockopt(SO_REUSEADDR) failed" << endl; + // Non-blocking sockets + // Windows: ioctlsocket() and FIONBIO + // Linux: fcntl() and O_NONBLOCK +#ifdef WIN32 + unsigned long i_mode = 1; + int result = ioctlsocket(sock, FIONBIO, &i_mode); + if (result != NO_ERROR) { + std::cerr << "ioctlsocket(FIONBIO) failed with error: " << result << std::endl; + } +#else // WIN32 + int flags = fcntl(sock, F_GETFL, 0); + fcntl(sock, F_SETFL, flags | O_NONBLOCK); +#endif // WIN32 + #ifdef SO_REUSEPORT if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) < 0) cerr << "setsockopt(SO_REUSEPORT) failed" << endl; @@ -375,6 +389,20 @@ class MJPG_sender if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) < 0) cerr << "setsockopt(SO_REUSEADDR) failed" << endl; + // Non-blocking sockets + // Windows: ioctlsocket() and FIONBIO + // Linux: fcntl() and O_NONBLOCK +#ifdef WIN32 + unsigned long i_mode = 1; + int result = ioctlsocket(sock, FIONBIO, &i_mode); + if (result != NO_ERROR) { + std::cerr << "ioctlsocket(FIONBIO) failed with error: " << result << std::endl; + } +#else // WIN32 + int flags = fcntl(sock, F_GETFL, 0); + fcntl(sock, F_SETFL, flags | O_NONBLOCK); +#endif // WIN32 + #ifdef SO_REUSEPORT if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) < 0) cerr << "setsockopt(SO_REUSEPORT) failed" << endl; diff --git a/src/parser.c b/src/parser.c index f425560f04a..2cf295f070e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -168,6 +168,9 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo if (stride_x < 1) stride_x = stride; if (stride_y < 1) stride_y = stride; } + else { + stride = option_find_int_quiet(options, "stride", 1); + } int dilation = option_find_int_quiet(options, "dilation", 1); int antialiasing = option_find_int_quiet(options, "antialiasing", 0); if (size == 1) dilation = 1; diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 4667794a839..e01dadbb306 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -147,8 +147,11 @@ LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename net.gpu_index = cur_gpu_id; //gpu_index = i; - char *cfgfile = const_cast(cfg_filename.data()); - char *weightfile = const_cast(weight_filename.data()); + _cfg_filename = cfg_filename; + _weight_filename = weight_filename; + + char *cfgfile = const_cast(_cfg_filename.c_str()); + char *weightfile = const_cast(_weight_filename.c_str()); net = parse_network_cfg_custom(cfgfile, 1, 0); if (weightfile) { @@ -243,7 +246,7 @@ static image load_image_stb(char *filename, int channels) LIB_API image_t Detector::load_image(std::string image_filename) { - char *input = const_cast(image_filename.data()); + char *input = const_cast(image_filename.c_str()); image im = load_image_stb(input, 3); image_t img; From 7018be435f06a9ac26c5b7494ac96068dbdf497a Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 4 Nov 2019 14:45:34 +0300 Subject: [PATCH 45/86] Compile fix --- src/http_stream.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/http_stream.cpp b/src/http_stream.cpp index 927a65bcf35..781139b3dff 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -50,6 +50,7 @@ static int close_socket(SOCKET s) { } #else // _WIN32 - else: nix #include "darkunistd.h" +#include #include #include #include From e345b8793c4dd8c28e0854145ac15b912b771bc6 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 5 Nov 2019 16:06:24 +0300 Subject: [PATCH 46/86] Fixed ./darknet partial for Anti-aliasing --- src/convolutional_layer.c | 19 ++++--------------- src/maxpool_layer.c | 19 ++++--------------- src/parser.c | 2 ++ 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 6818b603529..bf5beac7972 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -607,19 +607,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } else { for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - /* - l.input_layer->weights[i + 0] = 0; - l.input_layer->weights[i + 1] = 0; - l.input_layer->weights[i + 2] = 0; - - l.input_layer->weights[i + 3] = 0; - l.input_layer->weights[i + 4] = 1; - l.input_layer->weights[i + 5] = 0; - - l.input_layer->weights[i + 6] = 0; - l.input_layer->weights[i + 7] = 0; - l.input_layer->weights[i + 8] = 0; - */ l.input_layer->weights[i + 0] = 1 / 16.f; l.input_layer->weights[i + 1] = 2 / 16.f; l.input_layer->weights[i + 2] = 1 / 16.f; @@ -635,8 +622,10 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } for (i = 0; i < n; ++i) l.input_layer->biases[i] = 0; #ifdef GPU - l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); - push_convolutional_layer(*(l.input_layer)); + if (gpu_index >= 0) { + l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); + push_convolutional_layer(*(l.input_layer)); + } #endif // GPU } diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index dca9c1b15f7..a05074ecc74 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -127,19 +127,6 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s } else { for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - /* - l.input_layer->weights[i + 0] = 0; - l.input_layer->weights[i + 1] = 0; - l.input_layer->weights[i + 2] = 0; - - l.input_layer->weights[i + 3] = 0; - l.input_layer->weights[i + 4] = 1; - l.input_layer->weights[i + 5] = 0; - - l.input_layer->weights[i + 6] = 0; - l.input_layer->weights[i + 7] = 0; - l.input_layer->weights[i + 8] = 0; - */ l.input_layer->weights[i + 0] = 1 / 16.f; l.input_layer->weights[i + 1] = 2 / 16.f; l.input_layer->weights[i + 2] = 1 / 16.f; @@ -155,8 +142,10 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s } for (i = 0; i < l.out_c; ++i) l.input_layer->biases[i] = 0; #ifdef GPU - l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); - push_convolutional_layer(*(l.input_layer)); + if (gpu_index >= 0) { + l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); + push_convolutional_layer(*(l.input_layer)); + } #endif // GPU } diff --git a/src/parser.c b/src/parser.c index 2cf295f070e..edda54d1d92 100644 --- a/src/parser.c +++ b/src/parser.c @@ -951,6 +951,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) params.inputs = net.inputs; if (batch > 0) net.batch = batch; if (time_steps > 0) net.time_steps = time_steps; + if (net.batch < 1) net.batch = 1; + if (net.time_steps < 1) net.time_steps = 1; if (net.batch < net.time_steps) net.batch = net.time_steps; params.batch = net.batch; params.time_steps = net.time_steps; From d628e8eab7371f136cc05ae090b904eede7f9c55 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 5 Nov 2019 16:57:03 +0300 Subject: [PATCH 47/86] Fixed darknet.py for Uncertainty (gaussian_yolo_layer) --- build/darknet/x64/darknet.py | 3 ++- darknet.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build/darknet/x64/darknet.py b/build/darknet/x64/darknet.py index 10c9a456ebd..5cfb26ba16e 100644 --- a/build/darknet/x64/darknet.py +++ b/build/darknet/x64/darknet.py @@ -59,7 +59,8 @@ class DETECTION(Structure): ("prob", POINTER(c_float)), ("mask", POINTER(c_float)), ("objectness", c_float), - ("sort_class", c_int)] + ("sort_class", c_int), + ("uc", POINTER(c_float))] class IMAGE(Structure): diff --git a/darknet.py b/darknet.py index 10c9a456ebd..5cfb26ba16e 100644 --- a/darknet.py +++ b/darknet.py @@ -59,7 +59,8 @@ class DETECTION(Structure): ("prob", POINTER(c_float)), ("mask", POINTER(c_float)), ("objectness", c_float), - ("sort_class", c_int)] + ("sort_class", c_int), + ("uc", POINTER(c_float))] class IMAGE(Structure): From bf8ea4183dc265ac17f7c9d939dc815269f0a213 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 7 Nov 2019 01:15:58 +0300 Subject: [PATCH 48/86] Added MISH activation, use activation=mish in [convolutional] layers --- include/darknet.h | 6 +++--- src/activation_kernels.cu | 36 ++++++++++++++++++++++++++++++++++++ src/activations.c | 25 +++++++++++++++++++++++++ src/activations.h | 6 +++++- src/convolutional_kernels.cu | 9 ++++++--- src/convolutional_layer.c | 13 ++++++++----- src/layer.c | 4 ++-- 7 files changed, 85 insertions(+), 14 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 00b49921f52..55f94ac5224 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -102,7 +102,7 @@ typedef struct tree { // activations.h typedef enum { - LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH }ACTIVATION; // parser.h @@ -347,7 +347,7 @@ struct layer { float *col_image; float * delta; float * output; - float * output_sigmoid; + float * activation_input; int delta_pinned; int output_pinned; float * loss; @@ -532,7 +532,7 @@ struct layer { float * input_antialiasing_gpu; float * output_gpu; - float * output_sigmoid_gpu; + float * activation_input_gpu; float * loss_gpu; float * delta_gpu; float * rand_gpu; diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 24563c69d6e..846c586fada 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -199,6 +199,16 @@ __global__ void activate_array_swish_kernel(float *x, int n, float *output_sigmo } } +__global__ void activate_array_mish_kernel(float *x, int n, float *activation_input, float *output_gpu) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i < n) { + float x_val = x[i]; + activation_input[i] = x_val; // store value before activation + output_gpu[i] = x_val * tanh_activate_kernel(log(1 + expf(x_val))); + } +} + __global__ void activate_array_leaky_kernel(float *x, int n) { int index = blockIdx.x*blockDim.x + threadIdx.x; @@ -263,6 +273,18 @@ __global__ void gradient_array_swish_kernel(float *x, int n, float *sigmoid_gpu, } } +__global__ void gradient_array_mish_kernel(int n, float *activation_input, float *delta) +{ + int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (i < n) { + float x = activation_input[i]; + float d = 2 * expf(x) + expf(2 * x) + 2; + float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); + float derivative = expf(x) * w / (d * d); + delta[i] *= derivative; + } +} + __global__ void gradient_array_leaky_kernel(float *x, int n, float *delta) { int index = blockIdx.x*blockDim.x + threadIdx.x; @@ -333,6 +355,13 @@ extern "C" void activate_array_swish_ongpu(float *x, int n, float *output_sigmoi CHECK_CUDA(cudaPeekAtLastError()); } +extern "C" void activate_array_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu) +{ + const int num_blocks = get_number_of_blocks(n, BLOCK); + activate_array_mish_kernel << > >(x, n, activation_input_gpu, output_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} + extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta) { const int num_blocks = get_number_of_blocks(n, BLOCK); @@ -354,4 +383,11 @@ extern "C" void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, const int num_blocks = get_number_of_blocks(n, BLOCK); gradient_array_swish_kernel << > > (x, n, sigmoid_gpu, delta); CHECK_CUDA(cudaPeekAtLastError()); +} + +extern "C" void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta) +{ + const int num_blocks = get_number_of_blocks(n, BLOCK); + gradient_array_mish_kernel << > > (n, activation_input_gpu, delta); + CHECK_CUDA(cudaPeekAtLastError()); } \ No newline at end of file diff --git a/src/activations.c b/src/activations.c index c3ea4818aeb..5311fb97a77 100644 --- a/src/activations.c +++ b/src/activations.c @@ -46,6 +46,7 @@ ACTIVATION get_activation(char *s) { if (strcmp(s, "logistic")==0) return LOGISTIC; if (strcmp(s, "swish") == 0) return SWISH; + if (strcmp(s, "mish") == 0) return MISH; if (strcmp(s, "loggy")==0) return LOGGY; if (strcmp(s, "relu")==0) return RELU; if (strcmp(s, "elu")==0) return ELU; @@ -133,6 +134,17 @@ void activate_array_swish(float *x, const int n, float * output_sigmoid, float * } } +void activate_array_mish(float *x, const int n, float * activation_input, float * output) +{ + int i; + #pragma omp parallel for + for (i = 0; i < n; ++i) { + float x_val = x[i]; + activation_input[i] = x_val; // store value before activation + output[i] = x_val * tanh_activate(log(1 + expf(x_val))); + } +} + float gradient(float x, ACTIVATION a) { switch(a){ @@ -187,3 +199,16 @@ void gradient_array_swish(const float *x, const int n, const float * sigmoid, fl delta[i] *= swish + sigmoid[i]*(1 - swish); } } + +void gradient_array_mish(const int n, const float * activation_input, float * delta) +{ + int i; + #pragma omp parallel for + for (i = 0; i < n; ++i) { + float x = activation_input[i]; + float d = 2 * expf(x) + expf(2 * x) + 2; + float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); + float derivative = expf(x) * w / (d * d); + delta[i] *= derivative; + } +} diff --git a/src/activations.h b/src/activations.h index 19f3822c8e3..bba5ca8d10a 100644 --- a/src/activations.h +++ b/src/activations.h @@ -5,7 +5,7 @@ #include "math.h" //typedef enum{ -// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH //}ACTIVATION; #ifdef __cplusplus @@ -18,13 +18,17 @@ float activate(float x, ACTIVATION a); float gradient(float x, ACTIVATION a); void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); void gradient_array_swish(const float *x, const int n, const float * sigmoid, float * delta); +void gradient_array_mish(const int n, const float * activation_input, float * delta); void activate_array(float *x, const int n, const ACTIVATION a); void activate_array_swish(float *x, const int n, float * output_sigmoid, float * output); +void activate_array_mish(float *x, const int n, float * activation_input, float * output); #ifdef GPU void activate_array_ongpu(float *x, int n, ACTIVATION a); void activate_array_swish_ongpu(float *x, int n, float *output_sigmoid_gpu, float *output_gpu); +void activate_array_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu); void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta); void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delta); +void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta); #endif static inline float stair_activate(float x) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 0b94dd29db4..a73f277ee92 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -392,7 +392,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) */ //add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.output_sigmoid_gpu, l.output_gpu); + if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); + else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); else if (l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if (l.binary || l.xnor) swap_binary(&l); @@ -596,7 +597,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) //#ifndef CUDNN_HALF //#endif // no CUDNN_HALF - if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.output_sigmoid_gpu, l.output_gpu); + if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); + else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); else if (l.activation != LINEAR) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.dot > 0) dot_error_gpu(l); if(l.binary || l.xnor) swap_binary(&l); @@ -639,7 +641,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); - if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.output_sigmoid_gpu, l.delta_gpu); + if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); + else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); if (!l.batch_normalize) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index bf5beac7972..b76d7ee735f 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -473,10 +473,10 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.scale_v = (float*)calloc(n, sizeof(float)); } - if(l.activation == SWISH) l.output_sigmoid = (float*)calloc(total_batch*l.outputs, sizeof(float)); + if (l.activation == SWISH || l.activation == MISH) l.activation_input = (float*)calloc(total_batch*l.outputs, sizeof(float)); #ifdef GPU - if (l.activation == SWISH) l.output_sigmoid_gpu = cuda_make_array(l.output_sigmoid, total_batch*out_h*out_w*n); + if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, total_batch*out_h*out_w*n); l.forward_gpu = forward_convolutional_layer_gpu; l.backward_gpu = backward_convolutional_layer_gpu; @@ -1100,7 +1100,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); //activate_array(l.output, m*n*l.batch, l.activation); - if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.output_sigmoid, l.output); + if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); + else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); else activate_array_cpu_custom(l.output, m*n*l.batch, l.activation); return; @@ -1139,7 +1140,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); //activate_array(l.output, m*n*l.batch, l.activation); - if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.output_sigmoid, l.output); + if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); + else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); @@ -1276,7 +1278,8 @@ void backward_convolutional_layer(convolutional_layer l, network_state state) int n = l.size*l.size*l.c / l.groups; int k = l.out_w*l.out_h; - if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.output_sigmoid, l.delta); + if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta); + else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta); else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); if (l.batch_normalize) { diff --git a/src/layer.c b/src/layer.c index e9ae67b5ff5..9fe4a439364 100644 --- a/src/layer.c +++ b/src/layer.c @@ -90,7 +90,7 @@ void free_layer(layer l) #endif // GPU if (l.delta) free(l.delta), l.delta = NULL; if (l.output) free(l.output), l.output = NULL; - if (l.output_sigmoid) free(l.output_sigmoid), l.output_sigmoid = NULL; + if (l.activation_input) free(l.activation_input), l.activation_input = NULL; if (l.squared) free(l.squared); if (l.norms) free(l.norms); if (l.spatial_mean) free(l.spatial_mean); @@ -176,7 +176,7 @@ void free_layer(layer l) if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu), l.scale_updates_gpu = NULL; if (l.input_antialiasing_gpu) cuda_free(l.input_antialiasing_gpu), l.input_antialiasing_gpu = NULL; if (l.output_gpu) cuda_free(l.output_gpu), l.output_gpu = NULL; - if (l.output_sigmoid_gpu) cuda_free(l.output_sigmoid_gpu), l.output_sigmoid_gpu = NULL; + if (l.activation_input_gpu) cuda_free(l.activation_input_gpu), l.activation_input_gpu = NULL; if (l.delta_gpu) cuda_free(l.delta_gpu), l.delta_gpu = NULL; if (l.rand_gpu) cuda_free(l.rand_gpu); if (l.squared_gpu) cuda_free(l.squared_gpu); From c7c7078de7952fbdcfe7b4a7d60303e56e84846b Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 7 Nov 2019 01:18:47 +0300 Subject: [PATCH 49/86] Added URL to the description of activation MISH on GitHub --- src/activation_kernels.cu | 2 ++ src/activations.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 846c586fada..d8ff25f42e3 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -199,6 +199,7 @@ __global__ void activate_array_swish_kernel(float *x, int n, float *output_sigmo } } +// https://github.com/digantamisra98/Mish __global__ void activate_array_mish_kernel(float *x, int n, float *activation_input, float *output_gpu) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -273,6 +274,7 @@ __global__ void gradient_array_swish_kernel(float *x, int n, float *sigmoid_gpu, } } +// https://github.com/digantamisra98/Mish __global__ void gradient_array_mish_kernel(int n, float *activation_input, float *delta) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; diff --git a/src/activations.c b/src/activations.c index 5311fb97a77..da92af0af38 100644 --- a/src/activations.c +++ b/src/activations.c @@ -134,6 +134,7 @@ void activate_array_swish(float *x, const int n, float * output_sigmoid, float * } } +// https://github.com/digantamisra98/Mish void activate_array_mish(float *x, const int n, float * activation_input, float * output) { int i; @@ -200,6 +201,7 @@ void gradient_array_swish(const float *x, const int n, const float * sigmoid, fl } } +// https://github.com/digantamisra98/Mish void gradient_array_mish(const int n, const float * activation_input, float * delta) { int i; From 704bd1138e967bedf30ba4292369b3057aa5abeb Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 7 Nov 2019 23:49:10 +0300 Subject: [PATCH 50/86] Fixed params.net in parse_convolutional() and parse_route() --- src/parser.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/parser.c b/src/parser.c index edda54d1d92..52f3edceead 100644 --- a/src/parser.c +++ b/src/parser.c @@ -154,7 +154,7 @@ local_layer parse_local(list *options, size_params params) return layer; } -convolutional_layer parse_convolutional(list *options, size_params params, network net) +convolutional_layer parse_convolutional(list *options, size_params params) { int n = option_find_int(options, "filters",1); int groups = option_find_int_quiet(options, "groups", 1); @@ -185,8 +185,8 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo int share_index = option_find_int_quiet(options, "share_index", -1000000000); convolutional_layer *share_layer = NULL; - if(share_index >= 0) share_layer = &net.layers[share_index]; - else if(share_index != -1000000000) share_layer = &net.layers[params.index + share_index]; + if(share_index >= 0) share_layer = ¶ms.net.layers[share_index]; + else if(share_index != -1000000000) share_layer = ¶ms.net.layers[params.index + share_index]; int batch,h,w,c; h = params.h; @@ -754,7 +754,7 @@ layer parse_upsample(list *options, size_params params, network net) return l; } -route_layer parse_route(list *options, size_params params, network net) +route_layer parse_route(list *options, size_params params) { char *l = option_find(options, "layers"); int len = strlen(l); @@ -772,19 +772,19 @@ route_layer parse_route(list *options, size_params params, network net) l = strchr(l, ',')+1; if(index < 0) index = params.index + index; layers[i] = index; - sizes[i] = net.layers[index].outputs; + sizes[i] = params.net.layers[index].outputs; } int batch = params.batch; route_layer layer = make_route_layer(batch, n, layers, sizes); - convolutional_layer first = net.layers[layers[0]]; + convolutional_layer first = params.net.layers[layers[0]]; layer.out_w = first.out_w; layer.out_h = first.out_h; layer.out_c = first.out_c; for(i = 1; i < n; ++i){ int index = layers[i]; - convolutional_layer next = net.layers[index]; + convolutional_layer next = params.net.layers[index]; if(next.out_w == first.out_w && next.out_h == first.out_h){ layer.out_c += next.out_c; }else{ @@ -974,7 +974,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) layer l = { (LAYER_TYPE)0 }; LAYER_TYPE lt = string_to_layer_type(s->type); if(lt == CONVOLUTIONAL){ - l = parse_convolutional(options, params, net); + l = parse_convolutional(options, params); }else if(lt == LOCAL){ l = parse_local(options, params); }else if(lt == ACTIVE){ @@ -1019,7 +1019,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) }else if(lt == AVGPOOL){ l = parse_avgpool(options, params); }else if(lt == ROUTE){ - l = parse_route(options, params, net); + l = parse_route(options, params); int k; for (k = 0; k < l.n; ++k) net.layers[l.input_layers[k]].use_bin_output = 0; }else if (lt == UPSAMPLE) { From 0fa9c8f10588cd5db54b742bc2bcbefcddd87000 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 8 Nov 2019 01:06:49 +0300 Subject: [PATCH 51/86] Added groups= and groupd_id= params to the [route] layer --- include/darknet.h | 1 + src/parser.c | 6 +++++- src/route_layer.c | 35 ++++++++++++++++++++++++++--------- src/route_layer.h | 2 +- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 55f94ac5224..f29cb15689a 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -206,6 +206,7 @@ struct layer { int n; int max_boxes; int groups; + int group_id; int size; int side; int stride; diff --git a/src/parser.c b/src/parser.c index 52f3edceead..e0b65f3bf3b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -776,7 +776,10 @@ route_layer parse_route(list *options, size_params params) } int batch = params.batch; - route_layer layer = make_route_layer(batch, n, layers, sizes); + int groups = option_find_int_quiet(options, "groups", 1); + int group_id = option_find_int_quiet(options, "group_id", 0); + + route_layer layer = make_route_layer(batch, n, layers, sizes, groups, group_id); convolutional_layer first = params.net.layers[layers[0]]; layer.out_w = first.out_w; @@ -791,6 +794,7 @@ route_layer parse_route(list *options, size_params params) layer.out_h = layer.out_w = layer.out_c = 0; } } + layer.out_c = layer.out_c / layer.groups; return layer; } diff --git a/src/route_layer.c b/src/route_layer.c index b502fbe72b0..b636d4824e8 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -3,7 +3,7 @@ #include "blas.h" #include -route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes, int groups, int group_id) { fprintf(stderr,"route "); route_layer l = { (LAYER_TYPE)0 }; @@ -12,6 +12,8 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz l.n = n; l.input_layers = input_layers; l.input_sizes = input_sizes; + l.groups = groups; + l.group_id = group_id; int i; int outputs = 0; for(i = 0; i < n; ++i){ @@ -19,6 +21,7 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz outputs += input_sizes[i]; } fprintf(stderr, "\n"); + outputs = outputs / groups; l.outputs = outputs; l.inputs = outputs; l.delta = (float*)calloc(outputs * batch, sizeof(float)); @@ -57,6 +60,8 @@ void resize_route_layer(route_layer *l, network *net) l->out_h = l->out_w = l->out_c = 0; } } + l->out_c = l->out_c / l->groups; + l->outputs = l->outputs / l->groups; l->inputs = l->outputs; l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float)); l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float)); @@ -78,10 +83,13 @@ void forward_route_layer(const route_layer l, network_state state) int index = l.input_layers[i]; float *input = state.net.layers[index].output; int input_size = l.input_sizes[i]; + int part_input_size = input_size / l.groups; for(j = 0; j < l.batch; ++j){ - copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + //copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); + copy_cpu(part_input_size, input + j*input_size + part_input_size*l.group_id, 1, l.output + offset + j*l.outputs, 1); } - offset += input_size; + //offset += input_size; + offset += part_input_size; } } @@ -93,10 +101,13 @@ void backward_route_layer(const route_layer l, network_state state) int index = l.input_layers[i]; float *delta = state.net.layers[index].delta; int input_size = l.input_sizes[i]; + int part_input_size = input_size / l.groups; for(j = 0; j < l.batch; ++j){ - axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + //axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); + axpy_cpu(part_input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size + part_input_size*l.group_id, 1); } - offset += input_size; + //offset += input_size; + offset += part_input_size; } } @@ -109,11 +120,14 @@ void forward_route_layer_gpu(const route_layer l, network_state state) int index = l.input_layers[i]; float *input = state.net.layers[index].output_gpu; int input_size = l.input_sizes[i]; + int part_input_size = input_size / l.groups; for(j = 0; j < l.batch; ++j){ //copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); - simple_copy_ongpu(input_size, input + j*input_size, l.output_gpu + offset + j*l.outputs); + //simple_copy_ongpu(input_size, input + j*input_size, l.output_gpu + offset + j*l.outputs); + simple_copy_ongpu(part_input_size, input + j*input_size + part_input_size*l.group_id, l.output_gpu + offset + j*l.outputs); } - offset += input_size; + //offset += input_size; + offset += part_input_size; } } @@ -125,10 +139,13 @@ void backward_route_layer_gpu(const route_layer l, network_state state) int index = l.input_layers[i]; float *delta = state.net.layers[index].delta_gpu; int input_size = l.input_sizes[i]; + int part_input_size = input_size / l.groups; for(j = 0; j < l.batch; ++j){ - axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + //axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); + axpy_ongpu(part_input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size + part_input_size*l.group_id, 1); } - offset += input_size; + //offset += input_size; + offset += part_input_size; } } #endif diff --git a/src/route_layer.h b/src/route_layer.h index 3ee9019385f..2ebe396039c 100644 --- a/src/route_layer.h +++ b/src/route_layer.h @@ -8,7 +8,7 @@ typedef layer route_layer; #ifdef __cplusplus extern "C" { #endif -route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); +route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size, int groups, int group_id); void forward_route_layer(const route_layer l, network_state state); void backward_route_layer(const route_layer l, network_state state); void resize_route_layer(route_layer *l, network *net); From 0eee8404bf271256f3455491fe3c55830f14f31d Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 9 Nov 2019 14:03:29 +0300 Subject: [PATCH 52/86] Update Readme.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 9042a6f99e7..5c0efdeff1a 100644 --- a/README.md +++ b/README.md @@ -607,6 +607,8 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol * each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. + * to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5. + * Only if you are an **expert** in neural detection networks - recalculate anchors for your dataset for `width` and `height` from cfg-file: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file. But you should change indexes of anchors `masks=` for each [yolo]-layer, so that 1st-[yolo]-layer has anchors larger than 60x60, 2nd larger than 30x30, 3rd remaining. Also you should change the `filters=(classes + 5)*` before each [yolo]-layer. If many of the calculated anchors do not fit under the appropriate layers - then just try using all the default anchors. From d91d59a22fea9c266f06c2ae5edb23d38fd83c20 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 9 Nov 2019 22:48:18 +0300 Subject: [PATCH 53/86] Optimized memory allocation for Detection (inference only), without allocation memory for training --- include/darknet.h | 1 + src/conv_lstm_layer.c | 25 +++---- src/conv_lstm_layer.h | 2 +- src/convolutional_kernels.cu | 10 ++- src/convolutional_layer.c | 122 ++++++++++++++++++++++++----------- src/convolutional_layer.h | 3 +- src/crnn_layer.c | 9 +-- src/crnn_layer.h | 2 +- src/gemm.c | 4 +- src/maxpool_layer.c | 44 ++++++++----- src/maxpool_layer.h | 2 +- src/maxpool_layer_kernels.cu | 4 +- src/network.c | 1 + src/parser.c | 14 ++-- src/shortcut_layer.c | 16 +++-- src/shortcut_layer.h | 2 +- src/yolo_layer.c | 2 +- 17 files changed, 169 insertions(+), 94 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index f29cb15689a..8be704061b1 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -190,6 +190,7 @@ struct layer { void(*backward_gpu) (struct layer, struct network_state); void(*update_gpu) (struct layer, int, float, float, float); layer *share_layer; + int train; int batch_normalize; int shortcut; int batch; diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c index 4ae67b44a83..b4059ed1ec8 100644 --- a/src/conv_lstm_layer.c +++ b/src/conv_lstm_layer.c @@ -32,7 +32,7 @@ static void increment_layer(layer *l, int steps) } -layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor) +layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor, int train) { fprintf(stderr, "CONV_LSTM Layer: %d x %d x %d image, %d filters\n", h, w, c, output_filters); /* @@ -48,6 +48,7 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i */ batch = batch / steps; layer l = { (LAYER_TYPE)0 }; + l.train = train; l.batch = batch; l.type = CONV_LSTM; l.steps = steps; @@ -66,44 +67,44 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // U l.uf = (layer*)calloc(1, sizeof(layer)); - *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.uf->batch = batch; if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; l.ui = (layer*)calloc(1, sizeof(layer)); - *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.ui->batch = batch; if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; l.ug = (layer*)calloc(1, sizeof(layer)); - *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.ug->batch = batch; if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; l.uo = (layer*)calloc(1, sizeof(layer)); - *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.uo->batch = batch; if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; // W l.wf = (layer*)calloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.wf->batch = batch; if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; l.wi = (layer*)calloc(1, sizeof(layer)); - *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.wi->batch = batch; if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; l.wg = (layer*)calloc(1, sizeof(layer)); - *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.wg->batch = batch; if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; l.wo = (layer*)calloc(1, sizeof(layer)); - *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.wo->batch = batch; if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; @@ -111,21 +112,21 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i // V l.vf = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.vf->batch = batch; if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; } l.vi = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.vi->batch = batch; if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; } l.vo = (layer*)calloc(1, sizeof(layer)); if (l.peephole) { - *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.vo->batch = batch; if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; } diff --git a/src/conv_lstm_layer.h b/src/conv_lstm_layer.h index 17e4fdc3a66..0a0438b04f5 100644 --- a/src/conv_lstm_layer.h +++ b/src/conv_lstm_layer.h @@ -9,7 +9,7 @@ #ifdef __cplusplus extern "C" { #endif -layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor); +layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor, int train); void resize_conv_lstm_layer(layer *l, int w, int h); void free_state_conv_lstm(layer l); void randomize_state_conv_lstm(layer l); diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index a73f277ee92..1a6b5f8affb 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -986,7 +986,8 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) } else { if (iteration_num < state.net.burn_in) return; - else if (iteration_num > l.assisted_excitation) return; + else + if (iteration_num > l.assisted_excitation) return; else alpha = (1 + cos(3.141592 * iteration_num / (state.net.burn_in + l.assisted_excitation))) / 2; // from 1 to 0 } @@ -1018,6 +1019,7 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) for (t = 0; t < state.net.num_boxes; ++t) { box truth = float_to_box_stride(truth_cpu + t*(4 + 1) + b*l.truths, 1); if (!truth.x) break; // continue; + //float beta = 0; float beta = 1 - alpha; // from 0 to 1 float dw = (1 - truth.w) * beta; float dh = (1 - truth.h) * beta; @@ -1162,8 +1164,10 @@ void push_convolutional_layer(convolutional_layer l) cuda_convert_f32_to_f16(l.weights_gpu, l.nweights, l.weights_gpu16); #endif cuda_push_array(l.biases_gpu, l.biases, l.n); - cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); - cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + if (l.train) { + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); + } if (l.batch_normalize){ cuda_push_array(l.scales_gpu, l.scales, l.n); cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index b76d7ee735f..7f2bac9690b 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -123,7 +123,7 @@ size_t get_workspace_size32(layer l){ l.dweightDesc, l.bf_algo, &s)); - if (s > most) most = s; + if (s > most && l.train) most = s; CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), l.weightDesc, l.ddstTensorDesc, @@ -131,7 +131,7 @@ size_t get_workspace_size32(layer l){ l.dsrcTensorDesc, l.bd_algo, &s)); - if (s > most) most = s; + if (s > most && l.train) most = s; return most; } #endif @@ -164,7 +164,7 @@ size_t get_workspace_size16(layer l) { l.dweightDesc16, l.bf_algo16, &s)); - if (s > most) most = s; + if (s > most && l.train) most = s; CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), l.weightDesc16, l.ddstTensorDesc16, @@ -172,7 +172,7 @@ size_t get_workspace_size16(layer l) { l.dsrcTensorDesc16, l.bd_algo16, &s)); - if (s > most) most = s; + if (s > most && l.train) most = s; return most; } #endif @@ -333,12 +333,43 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) #endif #endif -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation) + +void free_convolutional_batchnorm(convolutional_layer *l) +{ + if (!l->share_layer) { + free(l->scales); + free(l->scale_updates); + free(l->mean); + free(l->variance); + free(l->mean_delta); + free(l->variance_delta); + free(l->rolling_mean); + free(l->rolling_variance); + free(l->x); + free(l->x_norm); + +#ifdef GPU + cuda_free(l->scales_gpu); + cuda_free(l->scale_updates_gpu); + cuda_free(l->mean_gpu); + cuda_free(l->variance_gpu); + cuda_free(l->mean_delta_gpu); + cuda_free(l->variance_delta_gpu); + cuda_free(l->rolling_mean_gpu); + cuda_free(l->rolling_variance_gpu); + cuda_free(l->x_gpu); + cuda_free(l->x_norm_gpu); +#endif + } +} + +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation, int train) { int total_batch = batch*steps; int i; convolutional_layer l = { (LAYER_TYPE)0 }; l.type = CONVOLUTIONAL; + l.train = train; if (xnor) groups = 1; // disable groups for XNOR-net if (groups < 1) groups = 1; @@ -382,10 +413,12 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } else { l.weights = (float*)calloc(l.nweights, sizeof(float)); - l.weight_updates = (float*)calloc(l.nweights, sizeof(float)); - l.biases = (float*)calloc(n, sizeof(float)); - l.bias_updates = (float*)calloc(n, sizeof(float)); + + if (train) { + l.weight_updates = (float*)calloc(l.nweights, sizeof(float)); + l.bias_updates = (float*)calloc(n, sizeof(float)); + } } // float scale = 1./sqrt(size*size*c); @@ -401,7 +434,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.activation = activation; l.output = (float*)calloc(total_batch*l.outputs, sizeof(float)); - l.delta = (float*)calloc(total_batch*l.outputs, sizeof(float)); + if (train) l.delta = (float*)calloc(total_batch*l.outputs, sizeof(float)); l.forward = forward_convolutional_layer; l.backward = backward_convolutional_layer; @@ -445,23 +478,27 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } else { l.scales = (float*)calloc(n, sizeof(float)); - l.scale_updates = (float*)calloc(n, sizeof(float)); for (i = 0; i < n; ++i) { l.scales[i] = 1; } + if (train) { + l.scale_updates = (float*)calloc(n, sizeof(float)); - l.mean = (float*)calloc(n, sizeof(float)); - l.variance = (float*)calloc(n, sizeof(float)); + l.mean = (float*)calloc(n, sizeof(float)); + l.variance = (float*)calloc(n, sizeof(float)); - l.mean_delta = (float*)calloc(n, sizeof(float)); - l.variance_delta = (float*)calloc(n, sizeof(float)); + l.mean_delta = (float*)calloc(n, sizeof(float)); + l.variance_delta = (float*)calloc(n, sizeof(float)); + } l.rolling_mean = (float*)calloc(n, sizeof(float)); l.rolling_variance = (float*)calloc(n, sizeof(float)); } - l.x = (float*)calloc(total_batch * l.outputs, sizeof(float)); - l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float)); + if (train) { + l.x = (float*)calloc(total_batch * l.outputs, sizeof(float)); + l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float)); + } } if(adam){ l.adam = 1; @@ -501,17 +538,17 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } else { l.weights_gpu = cuda_make_array(l.weights, l.nweights); - l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + if (train) l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); #ifdef CUDNN_HALF l.weights_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); - l.weight_updates_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); + if (train) l.weight_updates_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); #endif // CUDNN_HALF l.biases_gpu = cuda_make_array(l.biases, n); - l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); + if (train) l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); } l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); - l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); + if (train) l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); if(binary){ l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); @@ -535,19 +572,25 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } else { l.scales_gpu = cuda_make_array(l.scales, n); - l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); - l.mean_gpu = cuda_make_array(l.mean, n); - l.variance_gpu = cuda_make_array(l.variance, n); + if (train) { + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + + l.mean_gpu = cuda_make_array(l.mean, n); + l.variance_gpu = cuda_make_array(l.variance, n); + + l.mean_delta_gpu = cuda_make_array(l.mean, n); + l.variance_delta_gpu = cuda_make_array(l.variance, n); + } l.rolling_mean_gpu = cuda_make_array(l.mean, n); l.rolling_variance_gpu = cuda_make_array(l.variance, n); + } - l.mean_delta_gpu = cuda_make_array(l.mean, n); - l.variance_delta_gpu = cuda_make_array(l.variance, n); + if (train) { + l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); } - l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); - l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); } if (l.assisted_excitation) @@ -594,7 +637,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, blur_size = 2; blur_pad = 0; } - *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL, 0); + *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL, 0, train); const int blur_nweights = n * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; int i; if (blur_size == 2) { @@ -649,7 +692,7 @@ void denormalize_convolutional_layer(convolutional_layer l) void test_convolutional_layer() { - convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL, 0); + convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL, 0, 0); l.batch_normalize = 1; float data[] = {1,1,1,1,1, 1,1,1,1,1, @@ -688,10 +731,13 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) l->inputs = l->w * l->h * l->c; l->output = (float*)realloc(l->output, total_batch * l->outputs * sizeof(float)); - l->delta = (float*)realloc(l->delta, total_batch * l->outputs * sizeof(float)); - if(l->batch_normalize){ - l->x = (float*)realloc(l->x, total_batch * l->outputs * sizeof(float)); - l->x_norm = (float*)realloc(l->x_norm, total_batch * l->outputs * sizeof(float)); + if (l->train) { + l->delta = (float*)realloc(l->delta, total_batch * l->outputs * sizeof(float)); + + if (l->batch_normalize) { + l->x = (float*)realloc(l->x, total_batch * l->outputs * sizeof(float)); + l->x_norm = (float*)realloc(l->x_norm, total_batch * l->outputs * sizeof(float)); + } } if (l->xnor) { @@ -700,10 +746,12 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) #ifdef GPU if (old_w < w || old_h < h) { - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); + if (l->train) { + cuda_free(l->delta_gpu); + l->delta_gpu = cuda_make_array(l->delta, total_batch*l->outputs); + } - l->delta_gpu = cuda_make_array(l->delta, total_batch*l->outputs); + cuda_free(l->output_gpu); l->output_gpu = cuda_make_array(l->output, total_batch*l->outputs); if (l->batch_normalize) { @@ -1246,7 +1294,7 @@ void assisted_excitation_forward(convolutional_layer l, network_state state) } } - if(1) // visualize ground truth + if(0) // visualize ground truth { #ifdef OPENCV for (b = 0; b < l.batch; ++b) diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 0072ce549c3..817f900b783 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -28,9 +28,10 @@ void create_convolutional_cudnn_tensors(layer *l); void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); #endif #endif +void free_convolutional_batchnorm(convolutional_layer *l); size_t get_convolutional_workspace_size(layer l); -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation); +convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation, int train); void denormalize_convolutional_layer(convolutional_layer l); void resize_convolutional_layer(convolutional_layer *layer, int w, int h); void forward_convolutional_layer(const convolutional_layer layer, network_state state); diff --git a/src/crnn_layer.c b/src/crnn_layer.c index 588db7411a0..cbeaa5087bc 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -26,11 +26,12 @@ static void increment_layer(layer *l, int steps) #endif } -layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int xnor) +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int xnor, int train) { fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); batch = batch / steps; layer l = { (LAYER_TYPE)0 }; + l.train = train; l.batch = batch; l.type = CRNN; l.steps = steps; @@ -50,17 +51,17 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float)); l.input_layer = (layer*)calloc(1, sizeof(layer)); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.input_layer->batch = batch; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; l.self_layer = (layer*)calloc(1, sizeof(layer)); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.self_layer->batch = batch; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; l.output_layer = (layer*)calloc(1, sizeof(layer)); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0); + *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, train); l.output_layer->batch = batch; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; diff --git a/src/crnn_layer.h b/src/crnn_layer.h index 33560aae462..c2dc7a25258 100644 --- a/src/crnn_layer.h +++ b/src/crnn_layer.h @@ -9,7 +9,7 @@ #ifdef __cplusplus extern "C" { #endif -layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int xnor); +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int xnor, int train); void resize_crnn_layer(layer *l, int w, int h); void free_state_crnn(layer l); diff --git a/src/gemm.c b/src/gemm.c index 151388284b1..9f5cb882c61 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -1949,7 +1949,7 @@ void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, i } } dst[out_index] = max; - indexes[out_index] = max_i; + if (indexes) indexes[out_index] = max_i; } } } @@ -2452,7 +2452,7 @@ void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, i } } dst[out_index] = max; - indexes[out_index] = max_i; + if (indexes) indexes[out_index] = max_i; } } } diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index a05074ecc74..54aa9c0ed91 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -46,10 +46,11 @@ void cudnn_maxpool_setup(layer *l) } -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing) +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing, int train) { maxpool_layer l = { (LAYER_TYPE)0 }; l.type = MAXPOOL; + l.train = train; const int blur_stride_x = stride_x; const int blur_stride_y = stride_y; @@ -82,21 +83,25 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.stride_x = stride_x; l.stride_y = stride_y; int output_size = l.out_h * l.out_w * l.out_c * batch; - l.indexes = (int*)calloc(output_size, sizeof(int)); + + if (train) { + l.indexes = (int*)calloc(output_size, sizeof(int)); + l.delta = (float*)calloc(output_size, sizeof(float)); + } l.output = (float*)calloc(output_size, sizeof(float)); - l.delta = (float*)calloc(output_size, sizeof(float)); l.forward = forward_maxpool_layer; l.backward = backward_maxpool_layer; - #ifdef GPU +#ifdef GPU l.forward_gpu = forward_maxpool_layer_gpu; l.backward_gpu = backward_maxpool_layer_gpu; - l.indexes_gpu = cuda_make_int_array(output_size); + if (train) { + l.indexes_gpu = cuda_make_int_array(output_size); + l.delta_gpu = cuda_make_array(l.delta, output_size); + } l.output_gpu = cuda_make_array(l.output, output_size); - l.delta_gpu = cuda_make_array(l.delta, output_size); - cudnn_maxpool_setup(&l); - #endif // GPU +#endif // GPU l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; if (maxpool_depth) fprintf(stderr, "max-depth %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); @@ -114,7 +119,7 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s blur_size = 2; blur_pad = 0; } - *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL, 0); + *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL, 0, train); const int blur_nweights = l.out_c * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; int i; if (blur_size == 2) { @@ -163,17 +168,22 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h) l->outputs = l->out_w * l->out_h * l->out_c; int output_size = l->outputs * l->batch; - l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int)); + if (l->train) { + l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int)); + l->delta = (float*)realloc(l->delta, output_size * sizeof(float)); + } l->output = (float*)realloc(l->output, output_size * sizeof(float)); - l->delta = (float*)realloc(l->delta, output_size * sizeof(float)); #ifdef GPU - CHECK_CUDA(cudaFree((float *)l->indexes_gpu)); CHECK_CUDA(cudaFree(l->output_gpu)); - CHECK_CUDA(cudaFree(l->delta_gpu)); - l->indexes_gpu = cuda_make_int_array(output_size); l->output_gpu = cuda_make_array(l->output, output_size); - l->delta_gpu = cuda_make_array(l->delta, output_size); + + if (l->train) { + CHECK_CUDA(cudaFree((float *)l->indexes_gpu)); + CHECK_CUDA(cudaFree(l->delta_gpu)); + l->indexes_gpu = cuda_make_int_array(output_size); + l->delta_gpu = cuda_make_array(l->delta, output_size); + } cudnn_maxpool_setup(l); #endif @@ -203,7 +213,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) max = (val > max) ? val : max; } l.output[out_index] = max; - l.indexes[out_index] = max_i; + if (l.indexes) l.indexes[out_index] = max_i; } } } @@ -245,7 +255,7 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state) } } l.output[out_index] = max; - l.indexes[out_index] = max_i; + if (l.indexes) l.indexes[out_index] = max_i; } } } diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index cfedf9d9ee6..3986a4c6360 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -12,7 +12,7 @@ typedef layer maxpool_layer; extern "C" { #endif image get_maxpool_image(maxpool_layer l); -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing); +maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing, int train); void resize_maxpool_layer(maxpool_layer *l, int w, int h); void forward_maxpool_layer(const maxpool_layer l, network_state state); void backward_maxpool_layer(const maxpool_layer l, network_state state); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 8677b4d713d..c15143c556b 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -36,7 +36,7 @@ __global__ void forward_maxpool_depth_layer_kernel(int n, int w, int h, int c, i max = (val > max) ? val : max; } output[out_index] = max; - indexes[out_index] = max_i; + if (indexes) indexes[out_index] = max_i; } } @@ -88,7 +88,7 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c } } output[out_index] = max; - indexes[out_index] = max_i; + if (indexes) indexes[out_index] = max_i; } __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *delta, float *prev_delta, int *indexes) diff --git a/src/network.c b/src/network.c index cfe994343e8..c2249a54df6 100644 --- a/src/network.c +++ b/src/network.c @@ -1071,6 +1071,7 @@ void fuse_conv_batchnorm(network net) } } + free_convolutional_batchnorm(l); l->batch_normalize = 0; #ifdef GPU if (gpu_index >= 0) { diff --git a/src/parser.c b/src/parser.c index e0b65f3bf3b..1e5ad78c781 100644 --- a/src/parser.c +++ b/src/parser.c @@ -130,6 +130,7 @@ typedef struct size_params{ int c; int index; int time_steps; + int train; network net; } size_params; @@ -199,7 +200,7 @@ convolutional_layer parse_convolutional(list *options, size_params params) int xnor = option_find_int_quiet(options, "xnor", 0); int use_bin_output = option_find_int_quiet(options, "bin_output", 0); - convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation); + convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation, params.train); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); @@ -230,7 +231,7 @@ layer parse_crnn(list *options, size_params params) int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); int xnor = option_find_int_quiet(options, "xnor", 0); - layer l = make_crnn_layer(params.batch, params.h, params.w, params.c, hidden_filters, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, xnor); + layer l = make_crnn_layer(params.batch, params.h, params.w, params.c, hidden_filters, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, xnor, params.train); l.shortcut = option_find_int_quiet(options, "shortcut", 0); @@ -291,7 +292,7 @@ layer parse_conv_lstm(list *options, size_params params) int xnor = option_find_int_quiet(options, "xnor", 0); int peephole = option_find_int_quiet(options, "peephole", 0); - layer l = make_conv_lstm_layer(params.batch, params.h, params.w, params.c, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, peephole, xnor); + layer l = make_conv_lstm_layer(params.batch, params.h, params.w, params.c, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, peephole, xnor, params.train); l.state_constrain = option_find_int_quiet(options, "state_constrain", params.time_steps * 32); l.shortcut = option_find_int_quiet(options, "shortcut", 0); @@ -630,7 +631,7 @@ maxpool_layer parse_maxpool(list *options, size_params params) batch=params.batch; if(!(h && w && c)) error("Layer before maxpool layer must output image."); - maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing); + maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, params.train); return layer; } @@ -684,7 +685,7 @@ layer parse_shortcut(list *options, size_params params, network net) layer from = net.layers[index]; if (from.antialiasing) from = *from.input_layer; - layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation); + layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation, params.train); char *activation_s = option_find_str(options, "activation", "linear"); ACTIVATION activation = get_activation(activation_s); @@ -944,6 +945,9 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) net.gpu_index = gpu_index; size_params params; + if (batch > 0) params.train = 0; // allocates memory for Detection only + else params.train = 1; // allocates memory for Detection & Training + section *s = (section *)n->val; list *options = s->options; if(!is_network(s)) error("First section must be [net] or [network]"); diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 8345858e47d..615a5ea384b 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -5,11 +5,12 @@ #include #include -layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation) +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation, int train) { if(assisted_excitation) fprintf(stderr, "Shortcut Layer - AE: %d\n", index); else fprintf(stderr,"Shortcut Layer: %d\n", index); layer l = { (LAYER_TYPE)0 }; + l.train = train; l.type = SHORTCUT; l.batch = batch; l.w = w2; @@ -27,7 +28,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.index = index; - l.delta = (float*)calloc(l.outputs * batch, sizeof(float)); + if (train) l.delta = (float*)calloc(l.outputs * batch, sizeof(float)); l.output = (float*)calloc(l.outputs * batch, sizeof(float)); l.forward = forward_shortcut_layer; @@ -36,7 +37,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.forward_gpu = forward_shortcut_layer_gpu; l.backward_gpu = backward_shortcut_layer_gpu; - l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + if (train) l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); l.output_gpu = cuda_make_array(l.output, l.outputs*batch); if (l.assisted_excitation) { @@ -56,14 +57,17 @@ void resize_shortcut_layer(layer *l, int w, int h) l->h = l->out_h = h; l->outputs = w*h*l->out_c; l->inputs = l->outputs; - l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float)); + if (l->train) l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float)); l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float)); #ifdef GPU cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); + + if (l->train) { + cuda_free(l->delta_gpu); + l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); + } #endif } diff --git a/src/shortcut_layer.h b/src/shortcut_layer.h index ad8d45f3e28..89f22ceb3be 100644 --- a/src/shortcut_layer.h +++ b/src/shortcut_layer.h @@ -7,7 +7,7 @@ #ifdef __cplusplus extern "C" { #endif -layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation); +layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation, int train); void forward_shortcut_layer(const layer l, network_state state); void backward_shortcut_layer(const layer l, network_state state); void resize_shortcut_layer(layer *l, int w, int h); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 906ed427f01..06d2b513624 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -282,7 +282,6 @@ void forward_yolo_layer(const layer l, network_state state) box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); float best_iou = 0; int best_t = 0; - int class_id_match = 0; for (t = 0; t < l.max_boxes; ++t) { box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; @@ -298,6 +297,7 @@ void forward_yolo_layer(const layer l, network_state state) int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); float objectness = l.output[obj_index]; int pred_class_id = get_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness); + int class_id_match = 0; if (class_id == pred_class_id) class_id_match = 1; else class_id_match = 0; From 20659fc1a8ec858d24ad2b70f55add810dc5caff Mon Sep 17 00:00:00 2001 From: 7FM <41307817+7FM@users.noreply.github.com> Date: Mon, 11 Nov 2019 22:08:06 +0100 Subject: [PATCH 54/86] Fix undefined behavior caused by delete Avoid multiple deletes of the same address and memory leaks --- src/image_opencv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index 912a0b1b6d2..6951fb9a80e 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -734,7 +734,7 @@ int wait_for_stream(cap_cv *cap, cv::Mat* src, int dont_close) delete src;// cvReleaseImage(&src); int z = 0; for (z = 0; z < 20; ++z) { - get_capture_frame_cv(cap); + src = get_capture_frame_cv(cap); delete src;// cvReleaseImage(&src); } src = new cv::Mat(416, 416, CV_8UC(3)); // cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); From 9fe201807e244398e8aeaeed43c8823491fd421b Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 12 Nov 2019 15:05:34 +0300 Subject: [PATCH 55/86] Revert to old Assisted Excitation version. --- src/convolutional_kernels.cu | 8 ++++---- src/parser.c | 10 ++++++++++ src/route_layer.c | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 1a6b5f8affb..e5d57eabde9 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -1019,8 +1019,8 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) for (t = 0; t < state.net.num_boxes; ++t) { box truth = float_to_box_stride(truth_cpu + t*(4 + 1) + b*l.truths, 1); if (!truth.x) break; // continue; - //float beta = 0; - float beta = 1 - alpha; // from 0 to 1 + float beta = 0; + //float beta = 1 - alpha; // from 0 to 1 float dw = (1 - truth.w) * beta; float dh = (1 - truth.h) * beta; //printf(" alpha = %f, beta = %f, truth.w = %f, dw = %f, tw+dw = %f, l.out_w = %d \n", alpha, beta, truth.w, dw, truth.w+dw, l.out_w); @@ -1052,9 +1052,9 @@ void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) //CHECK_CUDA(cudaPeekAtLastError()); // calc new output - assisted_activation2_gpu(1, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); // AE3: gt increases (beta = 1 - alpha = 0) + //assisted_activation2_gpu(1, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); // AE3: gt increases (beta = 1 - alpha = 0) //assisted_activation2_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); - //assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); + assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); //cudaStreamSynchronize(get_cuda_stream()); //CHECK_CUDA(cudaPeekAtLastError()); diff --git a/src/parser.c b/src/parser.c index 1e5ad78c781..4d259f43a96 100644 --- a/src/parser.c +++ b/src/parser.c @@ -797,6 +797,16 @@ route_layer parse_route(list *options, size_params params) } layer.out_c = layer.out_c / layer.groups; + layer.w = first.w; + layer.h = first.h; + layer.c = layer.out_c; + + if (n > 3) fprintf(stderr, " \t "); + else if (n > 1) fprintf(stderr, " \t "); + else fprintf(stderr, " \t\t "); + + fprintf(stderr, " -> %4d x%4d x%4d \n", layer.w, layer.h, layer.c, layer.out_w, layer.out_h, layer.out_c); + return layer; } diff --git a/src/route_layer.c b/src/route_layer.c index b636d4824e8..8e3f15f8a9d 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -20,10 +20,10 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz fprintf(stderr," %d", input_layers[i]); outputs += input_sizes[i]; } - fprintf(stderr, "\n"); outputs = outputs / groups; l.outputs = outputs; l.inputs = outputs; + //fprintf(stderr, " inputs = %d \t outputs = %d, groups = %d, group_id = %d \n", l.inputs, l.outputs, l.groups, l.group_id); l.delta = (float*)calloc(outputs * batch, sizeof(float)); l.output = (float*)calloc(outputs * batch, sizeof(float)); From ded620ac6b23443b102bd58897877b0ce9100fa4 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 12 Nov 2019 15:11:33 +0300 Subject: [PATCH 56/86] fixed memory deallocation --- include/darknet.h | 1 + src/convolutional_layer.c | 40 +++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 8be704061b1..0acac8a60b7 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -873,6 +873,7 @@ LIB_API void free_layer(layer); LIB_API void free_data(data d); LIB_API pthread_t load_data(load_args args); LIB_API pthread_t load_data_in_thread(load_args args); +LIB_API void *load_thread(void *ptr); // dark_cuda.h LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 7f2bac9690b..4dd2cf96f9b 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -337,28 +337,28 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference) void free_convolutional_batchnorm(convolutional_layer *l) { if (!l->share_layer) { - free(l->scales); - free(l->scale_updates); - free(l->mean); - free(l->variance); - free(l->mean_delta); - free(l->variance_delta); - free(l->rolling_mean); - free(l->rolling_variance); - free(l->x); - free(l->x_norm); + free(l->scales); l->scales = NULL; + free(l->scale_updates); l->scale_updates = NULL; + free(l->mean); l->mean = NULL; + free(l->variance); l->variance = NULL; + free(l->mean_delta); l->mean_delta = NULL; + free(l->variance_delta); l->variance_delta = NULL; + free(l->rolling_mean); l->rolling_mean = NULL; + free(l->rolling_variance); l->rolling_variance = NULL; + free(l->x); l->x = NULL; + free(l->x_norm); l->x_norm = NULL; #ifdef GPU - cuda_free(l->scales_gpu); - cuda_free(l->scale_updates_gpu); - cuda_free(l->mean_gpu); - cuda_free(l->variance_gpu); - cuda_free(l->mean_delta_gpu); - cuda_free(l->variance_delta_gpu); - cuda_free(l->rolling_mean_gpu); - cuda_free(l->rolling_variance_gpu); - cuda_free(l->x_gpu); - cuda_free(l->x_norm_gpu); + cuda_free(l->scales_gpu); l->scales_gpu = NULL; + cuda_free(l->scale_updates_gpu); l->scale_updates_gpu = NULL; + cuda_free(l->mean_gpu); l->mean_gpu = NULL; + cuda_free(l->variance_gpu); l->variance_gpu = NULL; + cuda_free(l->mean_delta_gpu); l->mean_delta_gpu = NULL; + cuda_free(l->variance_delta_gpu); l->variance_delta_gpu = NULL; + cuda_free(l->rolling_mean_gpu); l->rolling_mean_gpu = NULL; + cuda_free(l->rolling_variance_gpu); l->rolling_variance_gpu = NULL; + cuda_free(l->x_gpu); l->x_gpu = NULL; + cuda_free(l->x_norm_gpu); l->x_norm_gpu = NULL; #endif } } From 70bf88ce01950e93f8bd54b53c585b98bae1146a Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 12 Nov 2019 15:21:21 +0300 Subject: [PATCH 57/86] Fixed swish-activation for [shortcut_layer] --- src/convolutional_layer.c | 2 +- src/shortcut_layer.c | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 4dd2cf96f9b..92e72732c1c 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -513,7 +513,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (l.activation == SWISH || l.activation == MISH) l.activation_input = (float*)calloc(total_batch*l.outputs, sizeof(float)); #ifdef GPU - if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, total_batch*out_h*out_w*n); + if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, total_batch*l.outputs); l.forward_gpu = forward_convolutional_layer_gpu; l.backward_gpu = backward_convolutional_layer_gpu; diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 615a5ea384b..a1dd3724e47 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -2,6 +2,7 @@ #include "convolutional_layer.h" #include "dark_cuda.h" #include "blas.h" +#include "gemm.h" #include #include @@ -33,7 +34,11 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.forward = forward_shortcut_layer; l.backward = backward_shortcut_layer; + + if (l.activation == SWISH || l.activation == MISH) l.activation_input = (float*)calloc(l.batch*l.outputs, sizeof(float)); #ifdef GPU + if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, l.batch*l.outputs); + l.forward_gpu = forward_shortcut_layer_gpu; l.backward_gpu = backward_shortcut_layer_gpu; @@ -85,14 +90,21 @@ void forward_shortcut_layer(const layer l, network_state state) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); } - activate_array(l.output, l.outputs*l.batch, l.activation); + + //activate_array(l.output, l.outputs*l.batch, l.activation); + if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); + else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); + else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation); if (l.assisted_excitation && state.train) assisted_excitation_forward(l, state); } void backward_shortcut_layer(const layer l, network_state state) { - gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta); + else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta); + else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); + axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); } @@ -104,14 +116,20 @@ void forward_shortcut_layer_gpu(const layer l, network_state state) //simple_copy_ongpu(l.outputs*l.batch, state.input, l.output_gpu); //shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); input_shortcut_gpu(state.input, l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); - activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); + + if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); + else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); + else activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); if (l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state); } void backward_shortcut_layer_gpu(const layer l, network_state state) { - gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); + else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); + else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); + axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1); shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu); } From 3652d7d3745a29ab161cf3a77492f1ac8b04b090 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 12 Nov 2019 15:31:07 +0300 Subject: [PATCH 58/86] Fixed MISH instability --- src/activation_kernels.cu | 24 ++++++++++++++++++------ src/activations.c | 23 ++++++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index d8ff25f42e3..67504e71611 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -275,15 +275,27 @@ __global__ void gradient_array_swish_kernel(float *x, int n, float *sigmoid_gpu, } // https://github.com/digantamisra98/Mish -__global__ void gradient_array_mish_kernel(int n, float *activation_input, float *delta) +__global__ void gradient_array_mish_kernel(int n, float *activation_input_gpu, float *delta) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (i < n) { - float x = activation_input[i]; - float d = 2 * expf(x) + expf(2 * x) + 2; - float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); - float derivative = expf(x) * w / (d * d); - delta[i] *= derivative; + const float THRESHOLD = 20.0f; + + // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed + // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 + float inp = activation_input_gpu[i]; + const float sp = (inp < THRESHOLD) ? log1p(exp(inp)) : inp; + const float grad_sp = 1 - exp(-sp); + const float tsp = tanh(sp); + const float grad_tsp = (1 - tsp*tsp) * grad_sp; + const float grad = inp * grad_tsp + tsp; + delta[i] *= grad; + + //float x = activation_input[i]; + //float d = 2 * expf(x) + expf(2 * x) + 2; + //float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); + //float derivative = expf(x) * w / (d * d); + //delta[i] *= derivative; } } diff --git a/src/activations.c b/src/activations.c index da92af0af38..55b060bd94c 100644 --- a/src/activations.c +++ b/src/activations.c @@ -207,10 +207,23 @@ void gradient_array_mish(const int n, const float * activation_input, float * de int i; #pragma omp parallel for for (i = 0; i < n; ++i) { - float x = activation_input[i]; - float d = 2 * expf(x) + expf(2 * x) + 2; - float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); - float derivative = expf(x) * w / (d * d); - delta[i] *= derivative; + const float THRESHOLD = 20.0f; + + // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed + // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 + float inp = activation_input[i]; + const float sp = (inp < THRESHOLD) ? log1p(exp(inp)) : inp; + const float grad_sp = 1 - exp(-sp); + const float tsp = tanh(sp); + const float grad_tsp = (1 - tsp*tsp) * grad_sp; + const float grad = inp * grad_tsp + tsp; + delta[i] *= grad; + + + //float x = activation_input[i]; + //float d = 2 * expf(x) + expf(2 * x) + 2; + //float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); + //float derivative = expf(x) * w / (d * d); + //delta[i] *= derivative; } } From c516b6cb0a08f82023067a649d10238ff18cf1e1 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 12 Nov 2019 20:20:28 +0300 Subject: [PATCH 59/86] Take TopK from obj.data file for Classifier --- src/classifier.c | 9 ++++++--- src/parser.c | 13 ++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/classifier.c b/src/classifier.c index 11fa92df913..86549ed8f40 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -63,6 +63,9 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, char *label_list = option_find_str(options, "labels", "data/labels.list"); char *train_list = option_find_str(options, "train", "data/train.list"); int classes = option_find_int(options, "classes", 2); + int topk_data = option_find_int(options, "top", 5); + char topk_buff[10]; + sprintf(topk_buff, "top%d", topk_data); char **labels = get_labels(label_list); list *plist = get_paths(train_list); @@ -157,14 +160,14 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int draw_precision = 0; if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) { iter_topk = i; - topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, 5); // calc TOP5 - printf("\n accuracy TOP5 = %f \n", topk); + topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP5 + printf("\n accuracy %s = %f \n", topk_buff, topk); draw_precision = 1; } printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); #ifdef OPENCV - draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, "top5", dont_show, mjpeg_port); + draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port); #endif // OPENCV if (i >= (iter_save + 1000)) { diff --git a/src/parser.c b/src/parser.c index 4d259f43a96..e47c2097526 100644 --- a/src/parser.c +++ b/src/parser.c @@ -801,11 +801,14 @@ route_layer parse_route(list *options, size_params params) layer.h = first.h; layer.c = layer.out_c; - if (n > 3) fprintf(stderr, " \t "); - else if (n > 1) fprintf(stderr, " \t "); - else fprintf(stderr, " \t\t "); - - fprintf(stderr, " -> %4d x%4d x%4d \n", layer.w, layer.h, layer.c, layer.out_w, layer.out_h, layer.out_c); + if (n > 3) fprintf(stderr, " \t "); + else if (n > 1) fprintf(stderr, " \t "); + else fprintf(stderr, " \t\t "); + + fprintf(stderr, " "); + if (layer.groups > 1) fprintf(stderr, "%d/%d", layer.group_id, layer.groups); + else fprintf(stderr, " "); + fprintf(stderr, " -> %4d x%4d x%4d \n", layer.out_w, layer.out_h, layer.out_c); return layer; } From 11142d00bedbafb015991fb20a05a5eb048200d6 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 13 Nov 2019 20:58:53 +0300 Subject: [PATCH 60/86] Fixed non-square network for Training Classifier --- src/classifier.c | 1 + src/convolutional_layer.c | 1 + src/data.c | 36 ++++++++++++++++++++---------------- src/data.h | 6 +++--- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/classifier.c b/src/classifier.c index 86549ed8f40..c5f8e2f18ca 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -77,6 +77,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, load_args args = {0}; args.w = net.w; args.h = net.h; + args.c = net.c; args.threads = 32; args.hierarchy = net.hierarchy; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 92e72732c1c..f9d66ebf3b6 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -609,6 +609,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); l.bflops = (2.0 * l.nweights * l.out_h*l.out_w) / 1000000000.; + if (l.xnor) l.bflops = l.bflops / 32; if (l.xnor && l.use_bin_output) fprintf(stderr, "convXB"); else if (l.xnor) fprintf(stderr, "convX "); else if (l.share_layer) fprintf(stderr, "convS "); diff --git a/src/data.c b/src/data.c index 622e401ef75..f4b64b998e3 100644 --- a/src/data.c +++ b/src/data.c @@ -142,7 +142,7 @@ matrix load_image_paths(char **paths, int n, int w, int h) return X; } -matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure) { int i; matrix X; @@ -151,6 +151,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int X.cols = 0; for(i = 0; i < n; ++i){ + int size = w > h ? w : h; image im = load_image_color(paths[i], 0, 0); image crop = random_augment_image(im, angle, aspect, min, max, size); int flip = use_flip ? random_gen() % 2 : 0; @@ -158,14 +159,17 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int flip_image(crop); random_distort_image(crop, hue, saturation, exposure); - /* - show_image(im, "orig"); - show_image(crop, "crop"); - cvWaitKey(0); - */ + image sized = resize_image(crop, w, h); + + //show_image(im, "orig"); + //show_image(sized, "sized"); + //show_image(sized, paths[i]); + //wait_until_press_key_cv(); + //printf("w = %d, h = %d \n", sized.w, sized.h); + free_image(im); - X.vals[i] = crop.data; - X.cols = crop.h*crop.w*crop.c; + X.vals[i] = sized.data; + X.cols = sized.h*sized.w*sized.c; } return X; } @@ -1165,7 +1169,7 @@ void *load_thread(void *ptr) if (a.type == OLD_CLASSIFICATION_DATA){ *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); } else if (a.type == CLASSIFICATION_DATA){ - *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure); } else if (a.type == SUPER_DATA){ *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); } else if (a.type == WRITING_DATA){ @@ -1186,7 +1190,7 @@ void *load_thread(void *ptr) *(a.im) = load_image(a.path, 0, 0, a.c); *(a.resized) = letterbox_image(*(a.im), a.w, a.h); } else if (a.type == TAG_DATA){ - *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); + *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure); } free(ptr); return 0; @@ -1310,25 +1314,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale) return d; } -data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure) { if(m) paths = get_random_paths(paths, n, m); data d = {0}; d.shallow = 0; - d.X = load_image_augment_paths(paths, n, use_flip, min, max, size, angle, aspect, hue, saturation, exposure); + d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d.y = load_labels_paths(paths, n, labels, k, hierarchy); if(m) free(paths); return d; } -data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) +data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure) { if(m) paths = get_random_paths(paths, n, m); data d = {0}; - d.w = size; - d.h = size; + d.w = w; + d.h = h; d.shallow = 0; - d.X = load_image_augment_paths(paths, n, use_flip, min, max, size, angle, aspect, hue, saturation, exposure); + d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d.y = load_tags_paths(paths, n, k); if(m) free(paths); return d; diff --git a/src/data.h b/src/data.h index 17edc5cf468..250e655a9b5 100644 --- a/src/data.h +++ b/src/data.h @@ -88,10 +88,10 @@ data load_data_captcha_encode(char **paths, int n, int m, int w, int h); data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs); -data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); -matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); data load_data_super(char **paths, int n, int m, int w, int h, int scale); -data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); data load_go(char *filename); box_label *read_boxes(char *filename, int *n); From a4012895c16aacf82416b3383b567fc143532c51 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 13 Nov 2019 21:12:30 +0300 Subject: [PATCH 61/86] accelerated the c++ example of usage DLL/SO library for images if compiled with OpenCV --- src/yolo_console_dll.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index 60da53f6516..f942fe13e3d 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -650,10 +650,12 @@ int main(int argc, char *argv[]) } else { // image file + // to achive high performance for multiple images do these 2 lines in another thread cv::Mat mat_img = cv::imread(filename); + auto det_image = detector.mat_to_image_resize(mat_img); auto start = std::chrono::steady_clock::now(); - std::vector result_vec = detector.detect(mat_img); + std::vector result_vec = detector.detect_resized(*det_image, mat_img.size().width, mat_img.size().height); auto end = std::chrono::steady_clock::now(); std::chrono::duration spent = end - start; std::cout << " Time: " << spent.count() << " sec \n"; From f42923350b9e9eeba372ec9792a2d162d31b0016 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 14 Nov 2019 03:08:35 +0300 Subject: [PATCH 62/86] Minor fix in batchnorm_layer (side effect in python samples) --- src/batchnorm_layer.c | 4 ++-- src/parser.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c index 4f9536a99ce..cbcc14d85e4 100644 --- a/src/batchnorm_layer.c +++ b/src/batchnorm_layer.c @@ -182,9 +182,9 @@ void forward_batchnorm_layer_gpu(layer l, network_state state) if (l.type == BATCHNORM) simple_copy_ongpu(l.outputs*l.batch, state.input, l.output_gpu); //copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); - simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.x_gpu); - //copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); if (state.train) { + simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.x_gpu); + //copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); #ifdef CUDNN float one = 1; float zero = 0; diff --git a/src/parser.c b/src/parser.c index e47c2097526..b05a293f786 100644 --- a/src/parser.c +++ b/src/parser.c @@ -978,6 +978,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) params.batch = net.batch; params.time_steps = net.time_steps; params.net = net; + printf("batch = %d, time_steps = %d, train = %d \n", net.batch, net.time_steps, params.train); float bflops = 0; size_t workspace_size = 0; From 509ba13acf9b6738d9838eb1553a2f40e36c0c2e Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 14 Nov 2019 14:14:54 +0300 Subject: [PATCH 63/86] Fixed recent memory leak for Classifier --- src/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data.c b/src/data.c index f4b64b998e3..d15aca09b8c 100644 --- a/src/data.c +++ b/src/data.c @@ -168,6 +168,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int //printf("w = %d, h = %d \n", sized.w, sized.h); free_image(im); + free_image(crop); X.vals[i] = sized.data; X.cols = sized.h*sized.w*sized.c; } From ee370e765d3de505df5657f33dde605625214d0b Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 14 Nov 2019 23:18:21 +0300 Subject: [PATCH 64/86] Fixed ignore_thresh --- src/yolo_layer.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 06d2b513624..42e595c1fcf 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -129,23 +129,18 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw } -int get_yolo_class(float *output, int classes, int class_index, int stride, float objectness) +int compare_yolo_class(float *output, int classes, int class_index, int stride, float objectness, int class_id) { - int class_id = 0; - float max_prob = FLT_MIN; + const float conf_thresh = 0.25; int j; for (j = 0; j < classes; ++j) { float prob = objectness * output[class_index + stride*j]; - if (prob > max_prob) { - max_prob = prob; - class_id = j; + if (prob > conf_thresh) { + return 1; } - //int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); - //float prob = objectness*predictions[class_index]; - //dets[count].prob[j] = (prob > thresh) ? prob : 0; } - return class_id; + return 0; } ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss) @@ -280,6 +275,8 @@ void forward_yolo_layer(const layer l, network_state state) for (n = 0; n < l.n; ++n) { int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); + float best_match_iou = 0; + int best_match_t = 0; float best_iou = 0; int best_t = 0; for (t = 0; t < l.max_boxes; ++t) { @@ -296,14 +293,14 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); float objectness = l.output[obj_index]; - int pred_class_id = get_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness); - int class_id_match = 0; - if (class_id == pred_class_id) class_id_match = 1; - else class_id_match = 0; + int class_id_match = compare_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id); float iou = box_iou(pred, truth); - //if (iou > best_iou) { - if (iou > best_iou && class_id_match == 1) { + if (iou > best_match_iou && class_id_match == 1) { + best_match_iou = iou; + best_match_t = t; + } + if (iou > best_iou) { best_iou = iou; best_t = t; } @@ -311,7 +308,7 @@ void forward_yolo_layer(const layer l, network_state state) int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); avg_anyobj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (0 - l.output[obj_index]); - if (best_iou > l.ignore_thresh) { + if (best_match_iou > l.ignore_thresh) { l.delta[obj_index] = 0; } if (best_iou > l.truth_thresh) { @@ -376,9 +373,6 @@ void forward_yolo_layer(const layer l, network_state state) ++count; ++class_count; - //if(iou > .5) recall += 1; - //if(iou > .75) recall75 += 1; - //avg_iou += iou; if (all_ious.iou > .5) recall += 1; if (all_ious.iou > .75) recall75 += 1; } From dd34fe156a1a03709b3444bd84d6ccf55f03c835 Mon Sep 17 00:00:00 2001 From: dccho Date: Thu, 14 Nov 2019 15:36:22 -0500 Subject: [PATCH 65/86] enable random resize training for efficient net --- src/convolutional_layer.c | 4 ++++ src/dropout_layer.c | 3 ++- src/network.c | 21 ++++++++++++++++----- src/scale_channels_layer.c | 7 ++++--- src/scale_channels_layer.h | 2 +- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index f9d66ebf3b6..e784f5e4b89 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -745,6 +745,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) //l->binary_input = realloc(l->inputs*l->batch, sizeof(float)); } + if (l->activation == SWISH || l->activation == MISH) l->activation_input = (float*)realloc(l->activation_input, total_batch*l->outputs * sizeof(float)); #ifdef GPU if (old_w < w || old_h < h) { if (l->train) { @@ -767,6 +768,9 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) cuda_free(l->binary_input_gpu); l->binary_input_gpu = cuda_make_array(0, l->inputs*l->batch); } + + cuda_free(l->activation_input_gpu); + if (l->activation == SWISH || l->activation == MISH) l->activation_input_gpu = cuda_make_array(l->activation_input, total_batch*l->outputs); } #ifdef CUDNN cudnn_convolutional_setup(l, cudnn_fastest); diff --git a/src/dropout_layer.c b/src/dropout_layer.c index 0d34ed24b2c..c32c5c616bc 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -27,11 +27,12 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability) void resize_dropout_layer(dropout_layer *l, int inputs) { + l->inputs = l->outputs = inputs; l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float)); #ifdef GPU cuda_free(l->rand_gpu); - l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); + l->rand_gpu = cuda_make_array(l->rand, l->inputs*l->batch); #endif } diff --git a/src/network.c b/src/network.c index c2249a54df6..96c935d9ce2 100644 --- a/src/network.c +++ b/src/network.c @@ -535,8 +535,16 @@ int resize_network(network *net, int w, int h) resize_route_layer(&l, net); }else if (l.type == SHORTCUT) { resize_shortcut_layer(&l, w, h); - //}else if (l.type == SCALE_CHANNELS) { - // resize_scale_channels_layer(&l, w, h); + }else if (l.type == SCALE_CHANNELS) { + resize_scale_channels_layer(&l, net); + }else if (l.type == DROPOUT) { + resize_dropout_layer(&l, inputs); + l.output = net->layers[i - 1].output; + l.delta = net->layers[i - 1].delta; +#ifdef GPU + l.output_gpu = net->layers[i-1].output_gpu; + l.delta_gpu = net->layers[i-1].delta_gpu; +#endif }else if (l.type == UPSAMPLE) { resize_upsample_layer(&l, w, h); }else if(l.type == REORG){ @@ -556,9 +564,12 @@ int resize_network(network *net, int w, int h) if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; inputs = l.outputs; net->layers[i] = l; - w = l.out_w; - h = l.out_h; - if(l.type == AVGPOOL) break; + if(l.type != DROPOUT) + { + w = l.out_w; + h = l.out_h; + } + //if(l.type == AVGPOOL) break; } #ifdef GPU const int size = get_network_input_size(*net) * net->batch; diff --git a/src/scale_channels_layer.c b/src/scale_channels_layer.c index 80be5361126..bcb54c1b497 100644 --- a/src/scale_channels_layer.c +++ b/src/scale_channels_layer.c @@ -39,10 +39,11 @@ layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w return l; } -void resize_scale_channels_layer(layer *l, int w, int h) +void resize_scale_channels_layer(layer *l, network *net) { - l->out_w = w; - l->out_h = h; + layer first = net->layers[l->index]; + l->out_w = first.out_w; + l->out_h = first.out_h; l->outputs = l->out_w*l->out_h*l->out_c; l->inputs = l->outputs; l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float)); diff --git a/src/scale_channels_layer.h b/src/scale_channels_layer.h index a20c070397b..fdaa4b924fe 100644 --- a/src/scale_channels_layer.h +++ b/src/scale_channels_layer.h @@ -10,7 +10,7 @@ extern "C" { layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); void forward_scale_channels_layer(const layer l, network_state state); void backward_scale_channels_layer(const layer l, network_state state); -void resize_scale_channels_layer(layer *l, int w, int h); +void resize_scale_channels_layer(layer *l, network *net); #ifdef GPU void forward_scale_channels_layer_gpu(const layer l, network_state state); From e7e85b358cb0531f7154fdd68306c4c4dc96b5d2 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 15 Nov 2019 01:14:55 +0300 Subject: [PATCH 66/86] Added delta_yolo_box_accumulate(). Added iou_thresh=0.213 parameter to [yolo] layer IoU(anchor,truth). --- include/darknet.h | 1 + src/parser.c | 1 + src/yolo_layer.c | 100 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 0acac8a60b7..2bd70d1cde3 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -280,6 +280,7 @@ struct layer { int random; float ignore_thresh; float truth_thresh; + float iou_thresh; float thresh; float focus; int classfix; diff --git a/src/parser.c b/src/parser.c index b05a293f786..472b179d20d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -380,6 +380,7 @@ layer parse_yolo(list *options, size_params params) l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] l.random = option_find_int_quiet(options, "random", 0); char *map_file = option_find_str(options, "map", 0); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 42e595c1fcf..6c02310617a 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -193,6 +193,63 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, return all_ious; } +ious delta_yolo_box_accumulate(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss) +{ + ious all_ious = { 0 }; + // i - step in layer width + // j - step in layer height + // Returns a box in absolute coordinates + box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + all_ious.iou = box_iou(pred, truth); + all_ious.giou = box_giou(pred, truth); + // avoid nan in dx_box_iou + if (pred.w == 0) { pred.w = 1.0; } + if (pred.h == 0) { pred.h = 1.0; } + if (iou_loss == MSE) // old loss + { + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2 * n]); + float th = log(truth.h*h / biases[2 * n + 1]); + + // accumulate delta + delta[index + 0 * stride] += scale * (tx - x[index + 0 * stride]); + delta[index + 1 * stride] += scale * (ty - x[index + 1 * stride]); + delta[index + 2 * stride] += scale * (tw - x[index + 2 * stride]); + delta[index + 3 * stride] += scale * (th - x[index + 3 * stride]); + } + else { + // https://github.com/generalized-iou/g-darknet + // https://arxiv.org/abs/1902.09630v2 + // https://giou.stanford.edu/ + all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); + + // jacobian^t (transpose) + float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); + float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + + // predict exponential, apply gradient of e^delta_t ONLY for w,h + dw *= exp(x[index + 2 * stride]); + dh *= exp(x[index + 3 * stride]); + + // normalize iou weight + dx *= iou_normalizer; + dy *= iou_normalizer; + dw *= iou_normalizer; + dh *= iou_normalizer; + + // accumulate delta + delta[index + 0 * stride] += dx; + delta[index + 1 * stride] += dy; + delta[index + 2 * stride] += dw; + delta[index + 3 * stride] += dh; + } + + return all_ious; +} + void delta_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, int focal_loss) { int n; @@ -254,6 +311,7 @@ void forward_yolo_layer(const layer l, network_state state) } #endif + // delta is zeroed memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); if (!state.train) return; //float avg_iou = 0; @@ -319,7 +377,7 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + delta_yolo_box_accumulate(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); } } } @@ -353,7 +411,7 @@ void forward_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if (mask_n >= 0) { int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + ious all_ious = delta_yolo_box_accumulate(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); // range is 0 <= 1 tot_iou += all_ious.iou; @@ -376,6 +434,44 @@ void forward_yolo_layer(const layer l, network_state state) if (all_ious.iou > .5) recall += 1; if (all_ious.iou > .75) recall75 += 1; } + + // iou_thresh + for (n = 0; n < l.total; ++n) { + int mask_n = int_index(l.mask, n, l.n); + if (mask_n >= 0 && n != best_n) { + box pred = { 0 }; + pred.w = l.biases[2 * n] / state.net.w; + pred.h = l.biases[2 * n + 1] / state.net.h; + float iou = box_iou(pred, truth_shift); + // iou, n + + if (iou > l.iou_thresh) { + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + ious all_ious = delta_yolo_box_accumulate(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + + // range is 0 <= 1 + tot_iou += all_ious.iou; + tot_iou_loss += 1 - all_ious.iou; + // range is -1 <= giou <= 1 + tot_giou += all_ious.giou; + tot_giou_loss += 1 - all_ious.giou; + + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); + + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; + int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); + delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.focal_loss); + + ++count; + ++class_count; + if (all_ious.iou > .5) recall += 1; + if (all_ious.iou > .75) recall75 += 1; + } + } + } } } //*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); From 3555beb91401cf746fd357b4029231143d00f58f Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 15 Nov 2019 22:51:06 +0300 Subject: [PATCH 67/86] Fixed some memory leaks in secondary functions --- src/classifier.c | 2 ++ src/coco.c | 10 ++++++- src/detector.c | 17 ++++++++++- src/gemm.c | 68 ++++++++++++++++++++++---------------------- src/go.c | 1 + src/image_opencv.cpp | 4 ++- src/network.c | 7 ++++- src/utils.c | 4 ++- src/yolo.c | 8 ++++++ 9 files changed, 82 insertions(+), 39 deletions(-) diff --git a/src/classifier.c b/src/classifier.c index c5f8e2f18ca..c077f61ae65 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -1288,4 +1288,6 @@ void run_classifier(int argc, char **argv) else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); + + if (gpus && gpu_list && ngpus > 1) free(gpus); } diff --git a/src/coco.c b/src/coco.c index cdfd3dff391..03dd3a61415 100644 --- a/src/coco.c +++ b/src/coco.c @@ -226,6 +226,12 @@ void validate_coco(char *cfgfile, char *weightfile) fprintf(fp, "\n]\n"); fclose(fp); + if (val) free(val); + if (val_resized) free(val_resized); + if (buf) free(buf); + if (buf_resized) free(buf_resized); + if (thr) free(thr); + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); } @@ -307,7 +313,9 @@ void validate_coco_recall(char *cfgfile, char *weightfile) } fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); - free(id); + + if (fps) free(fps); + if (id) free(id); free_image(orig); free_image(sized); } diff --git a/src/detector.c b/src/detector.c index efe5571aa8d..7f32c4fa7a6 100644 --- a/src/detector.c +++ b/src/detector.c @@ -556,6 +556,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out for (j = 0; j < classes; ++j) { if (fps) fclose(fps[j]); } + if (fps) free(fps); if (coco) { #ifdef WIN32 fseek(fp, -3, SEEK_CUR); @@ -563,8 +564,15 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out fseek(fp, -2, SEEK_CUR); #endif fprintf(fp, "\n]\n"); - fclose(fp); } + if (fp) fclose(fp); + + if (val) free(val); + if (val_resized) free(val_resized); + if (thr) free(thr); + if (buf) free(buf); + if (buf_resized) free(buf_resized); + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)time(0) - start); } @@ -1099,6 +1107,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa else { free_network(net); } + if (val) free(val); + if (val_resized) free(val_resized); + if (thr) free(thr); + if (buf) free(buf); + if (buf_resized) free(buf_resized); return mean_average_precision; } @@ -1505,4 +1518,6 @@ void run_detector(int argc, char **argv) free_list(options); } else printf(" There isn't such command: %s", argv[2]); + + if (gpus && gpu_list && ngpus > 1) free(gpus); } diff --git a/src/gemm.c b/src/gemm.c index 9f5cb882c61..51f77cac7a9 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -324,7 +324,7 @@ void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb) unsigned int x, y; for (y = 0; y < 32; ++y) { for (x = 0; x < 32; ++x) { - if (A[y * lda] & (1 << x)) B[x * ldb] |= (uint32_t)1 << y; + if (A[y * lda] & ((uint32_t)1 << x)) B[x * ldb] |= (uint32_t)1 << y; } } } @@ -636,48 +636,48 @@ void check_cpu_features(void) { // Detect Features if (nIds >= 0x00000001) { cpuid(info, 0x00000001); - HW_MMX = (info[3] & ((int)1 << 23)) != 0; - HW_SSE = (info[3] & ((int)1 << 25)) != 0; - HW_SSE2 = (info[3] & ((int)1 << 26)) != 0; - HW_SSE3 = (info[2] & ((int)1 << 0)) != 0; + HW_MMX = (info[3] & ((uint32_t)1 << 23)) != 0; + HW_SSE = (info[3] & ((uint32_t)1 << 25)) != 0; + HW_SSE2 = (info[3] & ((uint32_t)1 << 26)) != 0; + HW_SSE3 = (info[2] & ((uint32_t)1 << 0)) != 0; - HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0; - HW_SSE41 = (info[2] & ((int)1 << 19)) != 0; - HW_SSE42 = (info[2] & ((int)1 << 20)) != 0; - HW_AES = (info[2] & ((int)1 << 25)) != 0; + HW_SSSE3 = (info[2] & ((uint32_t)1 << 9)) != 0; + HW_SSE41 = (info[2] & ((uint32_t)1 << 19)) != 0; + HW_SSE42 = (info[2] & ((uint32_t)1 << 20)) != 0; + HW_AES = (info[2] & ((uint32_t)1 << 25)) != 0; - HW_AVX = (info[2] & ((int)1 << 28)) != 0; - HW_FMA3 = (info[2] & ((int)1 << 12)) != 0; + HW_AVX = (info[2] & ((uint32_t)1 << 28)) != 0; + HW_FMA3 = (info[2] & ((uint32_t)1 << 12)) != 0; - HW_RDRAND = (info[2] & ((int)1 << 30)) != 0; + HW_RDRAND = (info[2] & ((uint32_t)1 << 30)) != 0; } if (nIds >= 0x00000007) { cpuid(info, 0x00000007); - HW_AVX2 = (info[1] & ((int)1 << 5)) != 0; - - HW_BMI1 = (info[1] & ((int)1 << 3)) != 0; - HW_BMI2 = (info[1] & ((int)1 << 8)) != 0; - HW_ADX = (info[1] & ((int)1 << 19)) != 0; - HW_SHA = (info[1] & ((int)1 << 29)) != 0; - HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0; - - HW_AVX512F = (info[1] & ((int)1 << 16)) != 0; - HW_AVX512CD = (info[1] & ((int)1 << 28)) != 0; - HW_AVX512PF = (info[1] & ((int)1 << 26)) != 0; - HW_AVX512ER = (info[1] & ((int)1 << 27)) != 0; - HW_AVX512VL = (info[1] & ((int)1 << 31)) != 0; - HW_AVX512BW = (info[1] & ((int)1 << 30)) != 0; - HW_AVX512DQ = (info[1] & ((int)1 << 17)) != 0; - HW_AVX512IFMA = (info[1] & ((int)1 << 21)) != 0; - HW_AVX512VBMI = (info[2] & ((int)1 << 1)) != 0; + HW_AVX2 = (info[1] & ((uint32_t)1 << 5)) != 0; + + HW_BMI1 = (info[1] & ((uint32_t)1 << 3)) != 0; + HW_BMI2 = (info[1] & ((uint32_t)1 << 8)) != 0; + HW_ADX = (info[1] & ((uint32_t)1 << 19)) != 0; + HW_SHA = (info[1] & ((uint32_t)1 << 29)) != 0; + HW_PREFETCHWT1 = (info[2] & ((uint32_t)1 << 0)) != 0; + + HW_AVX512F = (info[1] & ((uint32_t)1 << 16)) != 0; + HW_AVX512CD = (info[1] & ((uint32_t)1 << 28)) != 0; + HW_AVX512PF = (info[1] & ((uint32_t)1 << 26)) != 0; + HW_AVX512ER = (info[1] & ((uint32_t)1 << 27)) != 0; + HW_AVX512VL = (info[1] & ((uint32_t)1 << 31)) != 0; + HW_AVX512BW = (info[1] & ((uint32_t)1 << 30)) != 0; + HW_AVX512DQ = (info[1] & ((uint32_t)1 << 17)) != 0; + HW_AVX512IFMA = (info[1] & ((uint32_t)1 << 21)) != 0; + HW_AVX512VBMI = (info[2] & ((uint32_t)1 << 1)) != 0; } if (nExIds >= 0x80000001) { cpuid(info, 0x80000001); - HW_x64 = (info[3] & ((int)1 << 29)) != 0; - HW_ABM = (info[2] & ((int)1 << 5)) != 0; - HW_SSE4a = (info[2] & ((int)1 << 6)) != 0; - HW_FMA4 = (info[2] & ((int)1 << 16)) != 0; - HW_XOP = (info[2] & ((int)1 << 11)) != 0; + HW_x64 = (info[3] & ((uint32_t)1 << 29)) != 0; + HW_ABM = (info[2] & ((uint32_t)1 << 5)) != 0; + HW_SSE4a = (info[2] & ((uint32_t)1 << 6)) != 0; + HW_FMA4 = (info[2] & ((uint32_t)1 << 16)) != 0; + HW_XOP = (info[2] & ((uint32_t)1 << 11)) != 0; } } diff --git a/src/go.c b/src/go.c index 5d507768e50..88da6c0d14d 100644 --- a/src/go.c +++ b/src/go.c @@ -47,6 +47,7 @@ moves load_go_moves(char *filename) printf("%d\n", count); m.n = count; m.data = (char**)realloc(m.data, count * sizeof(char*)); + fclose(fp); return m; } diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index 6951fb9a80e..0a8ccd9841e 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -703,11 +703,12 @@ int set_capture_position_frame_cv(cap_cv *cap, int index) image get_image_from_stream_cpp(cap_cv *cap) { - cv::Mat *src = new cv::Mat(); + cv::Mat *src = NULL; static int once = 1; if (once) { once = 0; do { + if (src) delete src; src = get_capture_frame_cv(cap); if (!src) return make_empty_image(0, 0, 0); } while (src->cols < 1 || src->rows < 1 || src->channels() < 1); @@ -719,6 +720,7 @@ image get_image_from_stream_cpp(cap_cv *cap) if (!src) return make_empty_image(0, 0, 0); image im = mat_to_image(*src); rgbgr_image(im); + if (src) delete src; return im; } // ---------------------------------------- diff --git a/src/network.c b/src/network.c index c2249a54df6..15c9a9bca80 100644 --- a/src/network.c +++ b/src/network.c @@ -809,6 +809,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names, const float thresh = 0.005; // function get_network_boxes() has already filtred dets by actual threshold char *send_buf = (char *)calloc(1024, sizeof(char)); + if (!send_buf) return 0; if (filename) { sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename); } @@ -826,6 +827,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names, if (class_id != -1) strcat(send_buf, ", \n"); class_id = j; char *buf = (char *)calloc(2048, sizeof(char)); + if (!buf) return 0; //sprintf(buf, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f}", // image_id, j, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h, dets[i].prob[j]); @@ -836,7 +838,10 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names, int buf_len = strlen(buf); int total_len = send_buf_len + buf_len + 100; send_buf = (char *)realloc(send_buf, total_len * sizeof(char)); - if (!send_buf) return 0;// exit(-1); + if (!send_buf) { + if (buf) free(buf); + return 0;// exit(-1); + } strcat(send_buf, buf); free(buf); } diff --git a/src/utils.c b/src/utils.c index bee427ed743..f18769ce76b 100644 --- a/src/utils.c +++ b/src/utils.c @@ -41,6 +41,7 @@ int *read_map(char *filename) map = (int*)realloc(map, n * sizeof(int)); map[n-1] = atoi(str); } + if (file) fclose(file); return map; } @@ -65,6 +66,7 @@ void shuffle(void *arr, size_t n, size_t size) memcpy((char*)arr+(j*size), (char*)arr+(i*size), size); memcpy((char*)arr+(i*size), swp, size); } + free(swp); } void del_arg(int argc, char **argv, int index) @@ -685,9 +687,9 @@ int max_index(float *a, int n) int top_max_index(float *a, int n, int k) { + if (n <= 0) return -1; float *values = (float*)calloc(k, sizeof(float)); int *indexes = (int*)calloc(k, sizeof(int)); - if (n <= 0) return -1; int i, j; for (i = 0; i < n; ++i) { for (j = 0; j < k; ++j) { diff --git a/src/yolo.c b/src/yolo.c index 711470eade2..339d49cd5ce 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -189,6 +189,14 @@ void validate_yolo(char *cfgfile, char *weightfile) free_image(val_resized[t]); } } + + if (fps) free(fps); + if (val) free(val); + if (val_resized) free(val_resized); + if (buf) free(buf); + if (buf_resized) free(buf_resized); + if (thr) free(thr); + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); } From 71e835458904f782a905a06d28b4558d9e9830b4 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 15 Nov 2019 23:33:16 +0300 Subject: [PATCH 68/86] Fixed source and destination overlap in sprintf() (i.e. in strcpy() inside) --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index f18769ce76b..4651cc0a1e8 100644 --- a/src/utils.c +++ b/src/utils.c @@ -218,7 +218,7 @@ void find_replace_extension(char *str, char *orig, char *rep, char *output) int offset = (p - buffer); int chars_from_end = strlen(buffer) - offset; if (!p || chars_from_end != strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? - sprintf(output, "%s", str); + sprintf(output, "%s", buffer); free(buffer); return; } From 0cf4c16c9fca93502333fef383e1287b38ca38f8 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 16 Nov 2019 16:50:01 +0300 Subject: [PATCH 69/86] Added GIoU to [Gaussian_yolo]. Added iou_thresh=0.213 to [Gaussian_yolo]. --- src/gaussian_yolo_layer.c | 188 +++++++++++++++++++++++++++++++++----- src/parser.c | 18 +++- src/yolo_layer.c | 117 ++++++++++-------------- 3 files changed, 225 insertions(+), 98 deletions(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 3b58cc5a404..b834c0b13ce 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -81,7 +81,7 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m */ #endif - fprintf(stderr, "Gaussian_yolo\n"); + //fprintf(stderr, "Gaussian_yolo\n"); srand(time(0)); return l; @@ -140,32 +140,70 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int return b; } -float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) +float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate) { box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); - float iou = box_iou(pred, truth); - float tx = (truth.x*lw - i); - float ty = (truth.y*lh - j); - float tw = log(truth.w*w / biases[2*n]); - float th = log(truth.h*h / biases[2*n + 1]); + float iou; + ious all_ious = { 0 }; + all_ious.iou = box_iou(pred, truth); + all_ious.giou = box_giou(pred, truth); + if (pred.w == 0) { pred.w = 1.0; } + if (pred.h == 0) { pred.h = 1.0; } float sigma_const = 0.3; float epsi = pow(10,-9); - float in_exp_x = (tx - x[index + 0*stride])/x[index+1*stride]; + float dx, dy, dw, dh; + + if (iou_loss == MSE) { + iou = all_ious.iou; + + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2 * n]); + float th = log(truth.h*h / biases[2 * n + 1]); + + dx = (tx - x[index + 0 * stride]); + dy = (ty - x[index + 2 * stride]); + dw = (tw - x[index + 4 * stride]); + dh = (th - x[index + 6 * stride]); + } + else + { + iou = all_ious.giou; + + // https://github.com/generalized-iou/g-darknet + // https://arxiv.org/abs/1902.09630v2 + // https://giou.stanford.edu/ + all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); + + // jacobian^t (transpose) + dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); + dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + + // normalize iou weight + dx *= iou_normalizer; + dy *= iou_normalizer; + dw *= iou_normalizer; + dh *= iou_normalizer; + } + + float in_exp_x = dx / x[index+1*stride]; float in_exp_x_2 = pow(in_exp_x, 2); float normal_dist_x = exp(in_exp_x_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+1*stride]+sigma_const)); - float in_exp_y = (ty - x[index + 2*stride])/x[index+3*stride]; + float in_exp_y = dy / x[index+3*stride]; float in_exp_y_2 = pow(in_exp_y, 2); float normal_dist_y = exp(in_exp_y_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+3*stride]+sigma_const)); - float in_exp_w = (tw - x[index + 4*stride])/x[index+5*stride]; + float in_exp_w = dw / x[index+5*stride]; float in_exp_w_2 = pow(in_exp_w, 2); float normal_dist_w = exp(in_exp_w_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+5*stride]+sigma_const)); - float in_exp_h = (th - x[index + 6*stride])/x[index+7*stride]; + float in_exp_h = dh / x[index+7*stride]; float in_exp_h_2 = pow(in_exp_h, 2); float normal_dist_h = exp(in_exp_h_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+7*stride]+sigma_const)); @@ -174,15 +212,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind float temp_w = (1./2.) * 1./(normal_dist_w+epsi) * normal_dist_w * scale; float temp_h = (1./2.) * 1./(normal_dist_h+epsi) * normal_dist_h * scale; - delta[index + 0*stride] = temp_x * in_exp_x * (1./x[index+1*stride]); - delta[index + 2*stride] = temp_y * in_exp_y * (1./x[index+3*stride]); - delta[index + 4*stride] = temp_w * in_exp_w * (1./x[index+5*stride]); - delta[index + 6*stride] = temp_h * in_exp_h * (1./x[index+7*stride]); + if (!accumulate) { + delta[index + 0 * stride] = 0; + delta[index + 1 * stride] = 0; + delta[index + 2 * stride] = 0; + delta[index + 3 * stride] = 0; + delta[index + 4 * stride] = 0; + delta[index + 5 * stride] = 0; + delta[index + 6 * stride] = 0; + delta[index + 7 * stride] = 0; + } - delta[index + 1*stride] = temp_x * (in_exp_x_2/x[index+1*stride] - 1./(x[index+1*stride]+sigma_const)); - delta[index + 3*stride] = temp_y * (in_exp_y_2/x[index+3*stride] - 1./(x[index+3*stride]+sigma_const)); - delta[index + 5*stride] = temp_w * (in_exp_w_2/x[index+5*stride] - 1./(x[index+5*stride]+sigma_const)); - delta[index + 7*stride] = temp_h * (in_exp_h_2/x[index+7*stride] - 1./(x[index+7*stride]+sigma_const)); + delta[index + 0*stride] += temp_x * in_exp_x * (1./x[index+1*stride]); + delta[index + 2*stride] += temp_y * in_exp_y * (1./x[index+3*stride]); + delta[index + 4*stride] += temp_w * in_exp_w * (1./x[index+5*stride]); + delta[index + 6*stride] += temp_h * in_exp_h * (1./x[index+7*stride]); + + delta[index + 1*stride] += temp_x * (in_exp_x_2/x[index+1*stride] - 1./(x[index+1*stride]+sigma_const)); + delta[index + 3*stride] += temp_y * (in_exp_y_2/x[index+3*stride] - 1./(x[index+3*stride]+sigma_const)); + delta[index + 5*stride] += temp_w * (in_exp_w_2/x[index+5*stride] - 1./(x[index+5*stride]+sigma_const)); + delta[index + 7*stride] += temp_h * (in_exp_h_2/x[index+7*stride] - 1./(x[index+7*stride]+sigma_const)); return iou; } @@ -201,6 +250,18 @@ void delta_gaussian_yolo_class(float *output, float *delta, int index, int class } } +int compare_gaussian_yolo_class(float *output, int classes, int class_index, int stride, float objectness, int class_id, float conf_thresh) +{ + int j; + for (j = 0; j < classes; ++j) { + float prob = objectness * output[class_index + stride*j]; + if (prob > conf_thresh) { + return 1; + } + } + return 0; +} + static int entry_gaussian_index(layer l, int batch, int location, int entry) { int n = location / (l.w*l.h); @@ -254,12 +315,31 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) for (n = 0; n < l.n; ++n) { int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); + float best_match_iou = 0; + int best_match_t = 0; float best_iou = 0; int best_t = 0; for(t = 0; t < l.max_boxes; ++t){ box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (class_id >= l.classes) { + printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); + printf(" truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id); + getchar(); + continue; // if label contains class_id more than number of classes in the cfg-file + } if(!truth.x) break; + + int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); + int obj_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 8); + float objectness = l.output[obj_index]; + int class_id_match = compare_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id, 0.25f); + float iou = box_iou(pred, truth); + if (iou > best_match_iou && class_id_match == 1) { + best_match_iou = iou; + best_match_t = t; + } if (iou > best_iou) { best_iou = iou; best_t = t; @@ -267,19 +347,19 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) } int obj_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 8); avg_anyobj += l.output[obj_index]; - l.delta[obj_index] = 0 - l.output[obj_index]; - if (best_iou > l.ignore_thresh) { + l.delta[obj_index] = l.cls_normalizer * (0 - l.output[obj_index]); + if (best_match_iou > l.ignore_thresh) { l.delta[obj_index] = 0; } if (best_iou > l.truth_thresh) { - l.delta[obj_index] = 1 - l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); } } } @@ -308,11 +388,11 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if(mask_n >= 0){ int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; - l.delta[obj_index] = 1 - l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; if (l.map) class_id = l.map[class_id]; @@ -325,6 +405,64 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if(iou > .75) recall75 += 1; avg_iou += iou; } + + + // iou_thresh + for (n = 0; n < l.total; ++n) { + int mask_n = int_index(l.mask, n, l.n); + if (mask_n >= 0 && n != best_n && l.iou_thresh < 1.0f) { + box pred = { 0 }; + pred.w = l.biases[2 * n] / state.net.w; + pred.h = l.biases[2 * n + 1] / state.net.h; + float iou = box_iou(pred, truth_shift); + // iou, n + + if (iou > l.iou_thresh) { + int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + + int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); + avg_obj += l.output[obj_index]; + l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); + + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; + int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat); + + ++count; + ++class_count; + if (iou > .5) recall += 1; + if (iou > .75) recall75 += 1; + avg_iou += iou; + } + } + } + } + + // averages the deltas obtained by the function: delta_yolo_box()_accumulate + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); + int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); + const int stride = l.w*l.h; + + int classes_in_one_box = 0; + for (n = 0; n < l.classes; ++n) { + if (l.delta[class_index + stride*n] > 0) classes_in_one_box++; + } + + l.delta[box_index + 0 * stride] /= classes_in_one_box; + l.delta[box_index + 1 * stride] /= classes_in_one_box; + l.delta[box_index + 2 * stride] /= classes_in_one_box; + l.delta[box_index + 3 * stride] /= classes_in_one_box; + l.delta[box_index + 4 * stride] /= classes_in_one_box; + l.delta[box_index + 5 * stride] /= classes_in_one_box; + l.delta[box_index + 6 * stride] /= classes_in_one_box; + l.delta[box_index + 7 * stride] /= classes_in_one_box; + } + } } } *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); diff --git a/src/parser.c b/src/parser.c index 472b179d20d..f0533f36e12 100644 --- a/src/parser.c +++ b/src/parser.c @@ -436,14 +436,28 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 char *a = option_find_str(options, "mask", 0); int *mask = parse_gaussian_yolo_mask(a, &num); layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); - assert(l.outputs == params.inputs); + if (l.outputs != params.inputs) { + printf("Error: l.outputs == params.inputs \n"); + printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer \n"); + exit(EXIT_FAILURE); + } + //assert(l.outputs == params.inputs); l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); - l.max_boxes = option_find_int_quiet(options, "max", 90); + l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); + l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); + char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); + + if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; + else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU; + else l.iou_loss = IOU; + fprintf(stderr, "[Gaussian_yolo] iou loss: %s, iou_norm: %2.2f, cls_norm: %2.2f, scale: %2.2f\n", (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer, l.scale_x_y); + l.jitter = option_find_float(options, "jitter", .2); l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.truth_thresh = option_find_float(options, "truth_thresh", 1); + l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] l.random = option_find_int_quiet(options, "random", 0); char *map_file = option_find_str(options, "map", 0); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 6c02310617a..b663b73da74 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -128,72 +128,7 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw return b; } - -int compare_yolo_class(float *output, int classes, int class_index, int stride, float objectness, int class_id) -{ - const float conf_thresh = 0.25; - - int j; - for (j = 0; j < classes; ++j) { - float prob = objectness * output[class_index + stride*j]; - if (prob > conf_thresh) { - return 1; - } - } - return 0; -} - -ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss) -{ - ious all_ious = { 0 }; - // i - step in layer width - // j - step in layer height - // Returns a box in absolute coordinates - box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); - all_ious.iou = box_iou(pred, truth); - all_ious.giou = box_giou(pred, truth); - // avoid nan in dx_box_iou - if (pred.w == 0) { pred.w = 1.0; } - if (pred.h == 0) { pred.h = 1.0; } - if (iou_loss == MSE) // old loss - { - float tx = (truth.x*lw - i); - float ty = (truth.y*lh - j); - float tw = log(truth.w*w / biases[2 * n]); - float th = log(truth.h*h / biases[2 * n + 1]); - - delta[index + 0 * stride] = scale * (tx - x[index + 0 * stride]); - delta[index + 1 * stride] = scale * (ty - x[index + 1 * stride]); - delta[index + 2 * stride] = scale * (tw - x[index + 2 * stride]); - delta[index + 3 * stride] = scale * (th - x[index + 3 * stride]); - } - else { - // https://github.com/generalized-iou/g-darknet - // https://arxiv.org/abs/1902.09630v2 - // https://giou.stanford.edu/ - all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); - - // jacobian^t (transpose) - delta[index + 0 * stride] = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - delta[index + 1 * stride] = (all_ious.dx_iou.dt + all_ious.dx_iou.db); - delta[index + 2 * stride] = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); - delta[index + 3 * stride] = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); - - // predict exponential, apply gradient of e^delta_t ONLY for w,h - delta[index + 2 * stride] *= exp(x[index + 2 * stride]); - delta[index + 3 * stride] *= exp(x[index + 3 * stride]); - - // normalize iou weight - delta[index + 0 * stride] *= iou_normalizer; - delta[index + 1 * stride] *= iou_normalizer; - delta[index + 2 * stride] *= iou_normalizer; - delta[index + 3 * stride] *= iou_normalizer; - } - - return all_ious; -} - -ious delta_yolo_box_accumulate(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss) +ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate) { ious all_ious = { 0 }; // i - step in layer width @@ -240,6 +175,13 @@ ious delta_yolo_box_accumulate(box truth, float *x, float *biases, int n, int in dw *= iou_normalizer; dh *= iou_normalizer; + if (!accumulate) { + delta[index + 0 * stride] = 0; + delta[index + 1 * stride] = 0; + delta[index + 2 * stride] = 0; + delta[index + 3 * stride] = 0; + } + // accumulate delta delta[index + 0 * stride] += dx; delta[index + 1 * stride] += dy; @@ -287,6 +229,18 @@ void delta_yolo_class(float *output, float *delta, int index, int class_id, int } } +int compare_yolo_class(float *output, int classes, int class_index, int stride, float objectness, int class_id, float conf_thresh) +{ + int j; + for (j = 0; j < classes; ++j) { + float prob = objectness * output[class_index + stride*j]; + if (prob > conf_thresh) { + return 1; + } + } + return 0; +} + static int entry_index(layer l, int batch, int location, int entry) { int n = location / (l.w*l.h); @@ -351,7 +305,7 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); float objectness = l.output[obj_index]; - int class_id_match = compare_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id); + int class_id_match = compare_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id, 0.25f); float iou = box_iou(pred, truth); if (iou > best_match_iou && class_id_match == 1) { @@ -377,7 +331,7 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_yolo_box_accumulate(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); } } } @@ -411,7 +365,7 @@ void forward_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if (mask_n >= 0) { int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - ious all_ious = delta_yolo_box_accumulate(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); // range is 0 <= 1 tot_iou += all_ious.iou; @@ -438,7 +392,7 @@ void forward_yolo_layer(const layer l, network_state state) // iou_thresh for (n = 0; n < l.total; ++n) { int mask_n = int_index(l.mask, n, l.n); - if (mask_n >= 0 && n != best_n) { + if (mask_n >= 0 && n != best_n && l.iou_thresh < 1.0f) { box pred = { 0 }; pred.w = l.biases[2 * n] / state.net.w; pred.h = l.biases[2 * n + 1] / state.net.h; @@ -447,7 +401,7 @@ void forward_yolo_layer(const layer l, network_state state) if (iou > l.iou_thresh) { int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - ious all_ious = delta_yolo_box_accumulate(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss); + ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); // range is 0 <= 1 tot_iou += all_ious.iou; @@ -473,6 +427,27 @@ void forward_yolo_layer(const layer l, network_state state) } } } + + // averages the deltas obtained by the function: delta_yolo_box()_accumulate + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); + int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); + const int stride = l.w*l.h; + + int classes_in_one_box = 0; + for (n = 0; n < l.classes; ++n) { + if (l.delta[class_index + stride*n] > 0) classes_in_one_box++; + } + + l.delta[box_index + 0 * stride] /= classes_in_one_box; + l.delta[box_index + 1 * stride] /= classes_in_one_box; + l.delta[box_index + 2 * stride] /= classes_in_one_box; + l.delta[box_index + 3 * stride] /= classes_in_one_box; + } + } + } } //*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); //printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count); From 0d30db35fc2d3d8edf52ef2870c7f18b1a6eabe3 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 16 Nov 2019 17:14:50 +0300 Subject: [PATCH 70/86] gaussian_yolo: added uc_normalizer and minor fix for iou_normalizer for GIoU. --- src/gaussian_yolo_layer.c | 59 ++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index b834c0b13ce..7200fecd61b 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -140,7 +140,7 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int return b; } -float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate) +float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate) { box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); @@ -157,6 +157,7 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind float dx, dy, dw, dh; if (iou_loss == MSE) { + // MSE iou = all_ious.iou; float tx = (truth.x*lw - i); @@ -171,6 +172,7 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind } else { + // GIoU iou = all_ious.giou; // https://github.com/generalized-iou/g-darknet @@ -183,14 +185,9 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); - - // normalize iou weight - dx *= iou_normalizer; - dy *= iou_normalizer; - dw *= iou_normalizer; - dh *= iou_normalizer; } + // Gaussian float in_exp_x = dx / x[index+1*stride]; float in_exp_x_2 = pow(in_exp_x, 2); float normal_dist_x = exp(in_exp_x_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+1*stride]+sigma_const)); @@ -223,15 +220,39 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind delta[index + 7 * stride] = 0; } - delta[index + 0*stride] += temp_x * in_exp_x * (1./x[index+1*stride]); - delta[index + 2*stride] += temp_y * in_exp_y * (1./x[index+3*stride]); - delta[index + 4*stride] += temp_w * in_exp_w * (1./x[index+5*stride]); - delta[index + 6*stride] += temp_h * in_exp_h * (1./x[index+7*stride]); - - delta[index + 1*stride] += temp_x * (in_exp_x_2/x[index+1*stride] - 1./(x[index+1*stride]+sigma_const)); - delta[index + 3*stride] += temp_y * (in_exp_y_2/x[index+3*stride] - 1./(x[index+3*stride]+sigma_const)); - delta[index + 5*stride] += temp_w * (in_exp_w_2/x[index+5*stride] - 1./(x[index+5*stride]+sigma_const)); - delta[index + 7*stride] += temp_h * (in_exp_h_2/x[index+7*stride] - 1./(x[index+7*stride]+sigma_const)); + float delta_x = temp_x * in_exp_x * (1. / x[index + 1 * stride]); + float delta_y = temp_y * in_exp_y * (1. / x[index + 3 * stride]); + float delta_w = temp_w * in_exp_w * (1. / x[index + 5 * stride]); + float delta_h = temp_h * in_exp_h * (1. / x[index + 7 * stride]); + + float delta_ux = temp_x * (in_exp_x_2 / x[index + 1 * stride] - 1. / (x[index + 1 * stride] + sigma_const)); + float delta_uy = temp_y * (in_exp_y_2 / x[index + 3 * stride] - 1. / (x[index + 3 * stride] + sigma_const)); + float delta_uw = temp_w * (in_exp_w_2 / x[index + 5 * stride] - 1. / (x[index + 5 * stride] + sigma_const)); + float delta_uh = temp_h * (in_exp_h_2 / x[index + 7 * stride] - 1. / (x[index + 7 * stride] + sigma_const)); + + if (iou_loss != MSE) { + // normalize iou weight, for GIoU + delta_x *= iou_normalizer; + delta_y *= iou_normalizer; + delta_w *= iou_normalizer; + delta_h *= iou_normalizer; + } + // normalize Uncertainty weight + delta_ux *= uc_normalizer; + delta_uy *= uc_normalizer; + delta_uw *= uc_normalizer; + delta_uh *= uc_normalizer; + + + delta[index + 0 * stride] += delta_x; + delta[index + 2 * stride] += delta_y; + delta[index + 4 * stride] += delta_w; + delta[index + 6 * stride] += delta_h; + + delta[index + 1 * stride] += delta_ux; + delta[index + 3 * stride] += delta_uy; + delta[index + 5 * stride] += delta_uw; + delta[index + 7 * stride] += delta_uh; return iou; } @@ -359,7 +380,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1); } } } @@ -388,7 +409,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if(mask_n >= 0){ int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; @@ -419,7 +440,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if (iou > l.iou_thresh) { int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; From 920de66bfa85a9e694d78b6138114c723c854d7c Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 16 Nov 2019 17:23:21 +0300 Subject: [PATCH 71/86] gaussian_yolo: added uc_normalizer and minor fix for iou_normalizer for GIoU. --- src/parser.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index f0533f36e12..3728c4422f9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -444,8 +444,9 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 //assert(l.outputs == params.inputs); l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); + l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0); l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); - l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); + l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0); char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; From 6e5bdf1282ad6b06ed0e962c3f5be67cf63d96dc Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 16 Nov 2019 18:16:37 +0300 Subject: [PATCH 72/86] Bug fix --- build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg | 807 ++++++++++++++++++ cfg/Gaussian_yolov3_BDD.cfg | 807 ++++++++++++++++++ include/darknet.h | 1 + src/gaussian_yolo_layer.c | 94 +- src/yolo_layer.c | 28 +- 5 files changed, 1715 insertions(+), 22 deletions(-) create mode 100644 build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg create mode 100644 cfg/Gaussian_yolov3_BDD.cfg diff --git a/build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg b/build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg new file mode 100644 index 00000000000..2ca7ec600e3 --- /dev/null +++ b/build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg @@ -0,0 +1,807 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=512 +height=512 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 +max_epochs = 300 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 6,7,8 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 3,4,5 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 0,1,2 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 diff --git a/cfg/Gaussian_yolov3_BDD.cfg b/cfg/Gaussian_yolov3_BDD.cfg new file mode 100644 index 00000000000..2ca7ec600e3 --- /dev/null +++ b/cfg/Gaussian_yolov3_BDD.cfg @@ -0,0 +1,807 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=512 +height=512 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 +max_epochs = 300 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 6,7,8 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 3,4,5 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 0,1,2 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 diff --git a/include/darknet.h b/include/darknet.h index 2bd70d1cde3..7a906780c61 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -330,6 +330,7 @@ struct layer { float *weight_updates; float scale_x_y; + float uc_normalizer; float iou_normalizer; float cls_normalizer; IOU_LOSS iou_loss; diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 7200fecd61b..16971437872 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -256,6 +256,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind return iou; } +void averages_gaussian_yolo_deltas(int class_index, int box_index, int stride, int classes, float *delta) +{ + + int classes_in_one_box = 0; + int c; + for (c = 0; c < classes; ++c) { + if (delta[class_index + stride*c] > 0) classes_in_one_box++; + } + + if (classes_in_one_box > 0) { + delta[box_index + 0 * stride] /= classes_in_one_box; + delta[box_index + 1 * stride] /= classes_in_one_box; + delta[box_index + 2 * stride] /= classes_in_one_box; + delta[box_index + 3 * stride] /= classes_in_one_box; + delta[box_index + 4 * stride] /= classes_in_one_box; + delta[box_index + 5 * stride] /= classes_in_one_box; + delta[box_index + 6 * stride] /= classes_in_one_box; + delta[box_index + 7 * stride] /= classes_in_one_box; + } +} void delta_gaussian_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat) { @@ -469,25 +489,73 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); const int stride = l.w*l.h; - int classes_in_one_box = 0; - for (n = 0; n < l.classes; ++n) { - if (l.delta[class_index + stride*n] > 0) classes_in_one_box++; - } + averages_gaussian_yolo_deltas(class_index, box_index, stride, l.classes, l.delta); + } + } + } + } + + + // calculate: Classification-loss, IoU-loss and Uncertainty-loss + const int stride = l.w*l.h; + float* classification_lost = (float *)calloc(l.batch * l.outputs, sizeof(float)); + memcpy(classification_lost, l.delta, l.batch * l.outputs * sizeof(float)); + - l.delta[box_index + 0 * stride] /= classes_in_one_box; - l.delta[box_index + 1 * stride] /= classes_in_one_box; - l.delta[box_index + 2 * stride] /= classes_in_one_box; - l.delta[box_index + 3 * stride] /= classes_in_one_box; - l.delta[box_index + 4 * stride] /= classes_in_one_box; - l.delta[box_index + 5 * stride] /= classes_in_one_box; - l.delta[box_index + 6 * stride] /= classes_in_one_box; - l.delta[box_index + 7 * stride] /= classes_in_one_box; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); + + classification_lost[box_index + 0 * stride] = 0; + classification_lost[box_index + 1 * stride] = 0; + classification_lost[box_index + 2 * stride] = 0; + classification_lost[box_index + 3 * stride] = 0; + classification_lost[box_index + 4 * stride] = 0; + classification_lost[box_index + 5 * stride] = 0; + classification_lost[box_index + 6 * stride] = 0; + classification_lost[box_index + 7 * stride] = 0; } } } } + float class_loss = pow(mag_array(classification_lost, l.outputs * l.batch), 2); + free(classification_lost); + + + float* except_uncertainty_lost = (float *)calloc(l.batch * l.outputs, sizeof(float)); + memcpy(except_uncertainty_lost, l.delta, l.batch * l.outputs * sizeof(float)); + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); + except_uncertainty_lost[box_index + 4 * stride] = 0; + except_uncertainty_lost[box_index + 5 * stride] = 0; + except_uncertainty_lost[box_index + 6 * stride] = 0; + except_uncertainty_lost[box_index + 7 * stride] = 0; + } + } + } + } + float except_uc_loss = pow(mag_array(except_uncertainty_lost, l.outputs * l.batch), 2); + free(except_uncertainty_lost); + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); + + float loss = pow(mag_array(l.delta, l.outputs * l.batch), 2); + float uc_loss = loss - except_uc_loss; + float iou_loss = except_uc_loss - class_loss; + + loss /= l.batch; + class_loss /= l.batch; + uc_loss /= l.batch; + iou_loss /= l.batch; + + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d, loss = %.2f, class_loss = %.2f, iou_loss = %.2f, uc_loss = %.2f \n", + state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count, + loss, class_loss, iou_loss, uc_loss); } void backward_gaussian_yolo_layer(const layer l, network_state state) diff --git a/src/yolo_layer.c b/src/yolo_layer.c index b663b73da74..34185937459 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -192,6 +192,23 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, return all_ious; } +void averages_yolo_deltas(int class_index, int box_index, int stride, int classes, float *delta) +{ + + int classes_in_one_box = 0; + int c; + for (c = 0; c < classes; ++c) { + if (delta[class_index + stride*c] > 0) classes_in_one_box++; + } + + if (classes_in_one_box > 0) { + delta[box_index + 0 * stride] /= classes_in_one_box; + delta[box_index + 1 * stride] /= classes_in_one_box; + delta[box_index + 2 * stride] /= classes_in_one_box; + delta[box_index + 3 * stride] /= classes_in_one_box; + } +} + void delta_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, int focal_loss) { int n; @@ -436,19 +453,12 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); const int stride = l.w*l.h; - int classes_in_one_box = 0; - for (n = 0; n < l.classes; ++n) { - if (l.delta[class_index + stride*n] > 0) classes_in_one_box++; - } - - l.delta[box_index + 0 * stride] /= classes_in_one_box; - l.delta[box_index + 1 * stride] /= classes_in_one_box; - l.delta[box_index + 2 * stride] /= classes_in_one_box; - l.delta[box_index + 3 * stride] /= classes_in_one_box; + averages_yolo_deltas(class_index, box_index, stride, l.classes, l.delta); } } } } + //*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); //printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count); From 5d0aa6ec522afd5d859c068f40eded77d6704fa0 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 17 Nov 2019 01:18:28 +0300 Subject: [PATCH 73/86] Compile fix --- src/gaussian_yolo_layer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 16971437872..93569cf0eac 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -374,7 +374,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); int obj_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 8); float objectness = l.output[obj_index]; - int class_id_match = compare_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id, 0.25f); + int class_id_match = compare_gaussian_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id, 0.25f); float iou = box_iou(pred, truth); if (iou > best_match_iou && class_id_match == 1) { From 77e60aecfe7f92de24fb1b96b5554fb16bc0ac06 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 17 Nov 2019 20:42:21 +0300 Subject: [PATCH 74/86] Minor fix --- src/box.c | 1 + src/detector.c | 1 + src/gaussian_yolo_layer.c | 70 +++++++++++++++++++++------------------ 3 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src/box.c b/src/box.c index c6a27ed587f..22f85884009 100644 --- a/src/box.c +++ b/src/box.c @@ -425,6 +425,7 @@ int nms_comparator_v3(const void *pa, const void *pb) float diff = 0; if (b.sort_class >= 0) { diff = a.prob[b.sort_class] - b.prob[b.sort_class]; + //diff = a.objectness*a.prob[b.sort_class] - b.objectness*b.prob[b.sort_class]; } else { diff = a.objectness - b.objectness; diff --git a/src/detector.c b/src/detector.c index 7f32c4fa7a6..8177343ef3d 100644 --- a/src/detector.c +++ b/src/detector.c @@ -801,6 +801,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa } //detection *dets = get_network_boxes(&net, val[t].w, val[t].h, thresh, hier_thresh, 0, 1, &nboxes, letter_box); // for letter_box=1 if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + //if (nms) do_nms_obj(dets, nboxes, l.classes, nms); char labelpath[4096]; replace_image_to_label(path, labelpath); diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 93569cf0eac..236e28e7d4e 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -156,36 +156,17 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind float dx, dy, dw, dh; - if (iou_loss == MSE) { - // MSE - iou = all_ious.iou; - - float tx = (truth.x*lw - i); - float ty = (truth.y*lh - j); - float tw = log(truth.w*w / biases[2 * n]); - float th = log(truth.h*h / biases[2 * n + 1]); - - dx = (tx - x[index + 0 * stride]); - dy = (ty - x[index + 2 * stride]); - dw = (tw - x[index + 4 * stride]); - dh = (th - x[index + 6 * stride]); - } - else - { - // GIoU - iou = all_ious.giou; + iou = all_ious.iou; - // https://github.com/generalized-iou/g-darknet - // https://arxiv.org/abs/1902.09630v2 - // https://giou.stanford.edu/ - all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); + float tx = (truth.x*lw - i); + float ty = (truth.y*lh - j); + float tw = log(truth.w*w / biases[2 * n]); + float th = log(truth.h*h / biases[2 * n + 1]); - // jacobian^t (transpose) - dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); - dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); - dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); - } + dx = (tx - x[index + 0 * stride]); + dy = (ty - x[index + 2 * stride]); + dw = (tw - x[index + 4 * stride]); + dh = (th - x[index + 6 * stride]); // Gaussian float in_exp_x = dx / x[index+1*stride]; @@ -231,19 +212,42 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind float delta_uh = temp_h * (in_exp_h_2 / x[index + 7 * stride] - 1. / (x[index + 7 * stride] + sigma_const)); if (iou_loss != MSE) { + // GIoU + iou = all_ious.giou; + + // https://github.com/generalized-iou/g-darknet + // https://arxiv.org/abs/1902.09630v2 + // https://giou.stanford.edu/ + all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); + + // jacobian^t (transpose) + float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); + float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + + // predict exponential, apply gradient of e^delta_t ONLY for w,h + dw *= exp(x[index + 4 * stride]); + dh *= exp(x[index + 6 * stride]); + // normalize iou weight, for GIoU - delta_x *= iou_normalizer; - delta_y *= iou_normalizer; - delta_w *= iou_normalizer; - delta_h *= iou_normalizer; + dx *= iou_normalizer; + dy *= iou_normalizer; + dw *= iou_normalizer; + dh *= iou_normalizer; + + delta_x = (delta_x + dx) / 2; + delta_y = (delta_y + dy) / 2; + delta_w = (delta_w + dw) / 2; + delta_h = (delta_h + dh) / 2; } + // normalize Uncertainty weight delta_ux *= uc_normalizer; delta_uy *= uc_normalizer; delta_uw *= uc_normalizer; delta_uh *= uc_normalizer; - delta[index + 0 * stride] += delta_x; delta[index + 2 * stride] += delta_y; delta[index + 4 * stride] += delta_w; From 10c40551dcadec68050befa6a1cecc6f69049d0d Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 18 Nov 2019 02:56:17 +0300 Subject: [PATCH 75/86] GIoU + Gaussian fix --- src/box.c | 3 +-- src/convolutional_layer.c | 5 +++++ src/gaussian_yolo_layer.c | 11 ++++++----- src/yolo_layer.c | 3 ++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/box.c b/src/box.c index 22f85884009..cb28ce8e4d7 100644 --- a/src/box.c +++ b/src/box.c @@ -424,8 +424,7 @@ int nms_comparator_v3(const void *pa, const void *pb) detection b = *(detection *)pb; float diff = 0; if (b.sort_class >= 0) { - diff = a.prob[b.sort_class] - b.prob[b.sort_class]; - //diff = a.objectness*a.prob[b.sort_class] - b.objectness*b.prob[b.sort_class]; + diff = a.prob[b.sort_class] - b.prob[b.sort_class]; // there is already: prob = objectness*prob } else { diff = a.objectness - b.objectness; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index f9d66ebf3b6..39b65a70d4c 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -405,6 +405,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.nweights = (c / groups) * n * size * size; if (l.share_layer) { + if (l.size != l.share_layer->size || l.nweights != l.share_layer->nweights || l.c != l.share_layer->c || l.n != l.share_layer->n) { + printf("Layer size, nweights, channels or filters don't match for the share_layer"); + getchar(); + } + l.weights = l.share_layer->weights; l.weight_updates = l.share_layer->weight_updates; diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 236e28e7d4e..109eb522c3a 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -236,10 +236,10 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind dw *= iou_normalizer; dh *= iou_normalizer; - delta_x = (delta_x + dx) / 2; - delta_y = (delta_y + dy) / 2; - delta_w = (delta_w + dw) / 2; - delta_h = (delta_h + dh) / 2; + delta_x = dx; + delta_y = dy; + delta_w = dw; + delta_h = dh; } // normalize Uncertainty weight @@ -299,7 +299,8 @@ int compare_gaussian_yolo_class(float *output, int classes, int class_index, int { int j; for (j = 0; j < classes; ++j) { - float prob = objectness * output[class_index + stride*j]; + //float prob = objectness * output[class_index + stride*j]; + float prob = output[class_index + stride*j]; if (prob > conf_thresh) { return 1; } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 34185937459..08577db5ab4 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -250,7 +250,8 @@ int compare_yolo_class(float *output, int classes, int class_index, int stride, { int j; for (j = 0; j < classes; ++j) { - float prob = objectness * output[class_index + stride*j]; + //float prob = objectness * output[class_index + stride*j]; + float prob = output[class_index + stride*j]; if (prob > conf_thresh) { return 1; } From b4c0fbaec86e5088848094cdaf58d56f94138a13 Mon Sep 17 00:00:00 2001 From: dccho Date: Mon, 18 Nov 2019 09:52:22 -0500 Subject: [PATCH 76/86] fix memory free bug --- src/convolutional_layer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index e784f5e4b89..805da22863d 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -769,8 +769,10 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) l->binary_input_gpu = cuda_make_array(0, l->inputs*l->batch); } - cuda_free(l->activation_input_gpu); - if (l->activation == SWISH || l->activation == MISH) l->activation_input_gpu = cuda_make_array(l->activation_input, total_batch*l->outputs); + if (l->activation == SWISH || l->activation == MISH) { + cuda_free(l->activation_input_gpu); + l->activation_input_gpu = cuda_make_array(l->activation_input, total_batch*l->outputs); + } } #ifdef CUDNN cudnn_convolutional_setup(l, cudnn_fastest); From 5a77940bd58ac29841a7ba36e18f73d32272c48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Thu, 21 Nov 2019 01:53:31 +0000 Subject: [PATCH 77/86] Add $(OBJDIR) as prerequisite to $(LIBNAMESO) The directory must exist before running this rule. When running `make` with several parallel jobs it can happen that $(LIBNAMESO) rule is run before creating the directory. --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 41e5fc8d737..c59991281ba 100644 --- a/Makefile +++ b/Makefile @@ -127,12 +127,12 @@ endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) DEPS = $(wildcard src/*.h) Makefile include/darknet.h -all: obj backup results setchmod $(EXEC) $(LIBNAMESO) $(APPNAMESO) +all: $(OBJDIR) backup results setchmod $(EXEC) $(LIBNAMESO) $(APPNAMESO) ifeq ($(LIBSO), 1) CFLAGS+= -fPIC -$(LIBNAMESO): $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp +$(LIBNAMESO): $(OBJDIR) $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp $(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS) $(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp @@ -151,8 +151,8 @@ $(OBJDIR)%.o: %.cpp $(DEPS) $(OBJDIR)%.o: %.cu $(DEPS) $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ -obj: - mkdir -p obj +$(OBJDIR): + mkdir -p $(OBJDIR) backup: mkdir -p backup results: From f7a6f7b87cdfd49c9930d2c2a2d7fa5b52b30940 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 21 Nov 2019 14:11:52 +0300 Subject: [PATCH 78/86] Fixed MISH as in thomasbrandon/mish-cuda implementation with 1 Threshold --- src/activation_kernels.cu | 11 ++++++++--- src/activations.c | 9 ++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 67504e71611..6ef165ce768 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -204,9 +204,14 @@ __global__ void activate_array_mish_kernel(float *x, int n, float *activation_in { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (i < n) { + const float MISH_THRESHOLD = 20; float x_val = x[i]; activation_input[i] = x_val; // store value before activation - output_gpu[i] = x_val * tanh_activate_kernel(log(1 + expf(x_val))); + //output_gpu[i] = x_val * tanh_activate_kernel(log(1 + expf(x_val))); + + // https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L17-L20 + if (x_val < MISH_THRESHOLD) output_gpu[i] = x_val * tanh_activate_kernel(log(expf(x_val))); + else output_gpu[i] = x_val * tanh_activate_kernel(x_val); } } @@ -279,12 +284,12 @@ __global__ void gradient_array_mish_kernel(int n, float *activation_input_gpu, f { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; if (i < n) { - const float THRESHOLD = 20.0f; + const float MISH_THRESHOLD = 20.0f; // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 float inp = activation_input_gpu[i]; - const float sp = (inp < THRESHOLD) ? log1p(exp(inp)) : inp; + const float sp = (inp < MISH_THRESHOLD) ? log1p(exp(inp)) : inp; const float grad_sp = 1 - exp(-sp); const float tsp = tanh(sp); const float grad_tsp = (1 - tsp*tsp) * grad_sp; diff --git a/src/activations.c b/src/activations.c index 55b060bd94c..83580cb24a5 100644 --- a/src/activations.c +++ b/src/activations.c @@ -137,12 +137,15 @@ void activate_array_swish(float *x, const int n, float * output_sigmoid, float * // https://github.com/digantamisra98/Mish void activate_array_mish(float *x, const int n, float * activation_input, float * output) { + const float MISH_THRESHOLD = 20; int i; #pragma omp parallel for for (i = 0; i < n; ++i) { float x_val = x[i]; activation_input[i] = x_val; // store value before activation - output[i] = x_val * tanh_activate(log(1 + expf(x_val))); + //output[i] = x_val * tanh_activate(log(1 + expf(x_val))); + if (x_val < MISH_THRESHOLD) output[i] = x_val * tanh_activate(log(expf(x_val))); + else output[i] = x_val * tanh_activate(x_val); } } @@ -207,12 +210,12 @@ void gradient_array_mish(const int n, const float * activation_input, float * de int i; #pragma omp parallel for for (i = 0; i < n; ++i) { - const float THRESHOLD = 20.0f; + const float MISH_THRESHOLD = 20.0f; // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 float inp = activation_input[i]; - const float sp = (inp < THRESHOLD) ? log1p(exp(inp)) : inp; + const float sp = (inp < MISH_THRESHOLD) ? log1p(exp(inp)) : inp; const float grad_sp = 1 - exp(-sp); const float tsp = tanh(sp); const float grad_tsp = (1 - tsp*tsp) * grad_sp; From 7713a0209c0fc5d213db49243ab86306411fec5d Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 22 Nov 2019 02:43:33 +0300 Subject: [PATCH 79/86] Update Readme.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5c0efdeff1a..13a13167dd4 100644 --- a/README.md +++ b/README.md @@ -632,6 +632,7 @@ Different tools for marking objects in images: 2. in Python: https://github.com/tzutalin/labelImg 3. in Python: https://github.com/Cartucho/OpenLabeling 4. in C++: https://www.ccoderun.ca/darkmark/ +5. in JavaScript: https://github.com/opencv/cvat ## Using Yolo9000 From b9ca5ec781291f01174d6b496a9c3ebc59303c1f Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 22 Nov 2019 14:20:53 +0300 Subject: [PATCH 80/86] Fixed MISH activation with 2 thresholds in Softplus --- src/activation_kernels.cu | 23 +++++++++++++++-------- src/activations.c | 6 ++---- src/activations.h | 5 +++++ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 6ef165ce768..5b357d3fce6 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -35,6 +35,11 @@ __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} +__device__ float softplus_kernel(float x, float threshold = 20) { + if (x > threshold) return x; // too large + else if (x < -threshold) return expf(x); // too small + return logf(expf(x) + 1); +} __device__ float plse_activate_kernel(float x) { if(x < -4) return .01f * (x + 4); @@ -207,11 +212,12 @@ __global__ void activate_array_mish_kernel(float *x, int n, float *activation_in const float MISH_THRESHOLD = 20; float x_val = x[i]; activation_input[i] = x_val; // store value before activation - //output_gpu[i] = x_val * tanh_activate_kernel(log(1 + expf(x_val))); + //output_gpu[i] = x_val * tanh_activate_kernel(logf(1 + expf(x_val))); - // https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L17-L20 - if (x_val < MISH_THRESHOLD) output_gpu[i] = x_val * tanh_activate_kernel(log(expf(x_val))); - else output_gpu[i] = x_val * tanh_activate_kernel(x_val); + // Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L17-L20 + // TF: https://github.com/tensorflow/addons/blob/093cdfa85d334cbe19a37624c33198f3140109ed/tensorflow_addons/custom_ops/activations/cc/kernels/mish_op.h#L40-L49 + // log1p(x) == log(x + 1) + output_gpu[i] = x_val * tanh_activate_kernel( softplus_kernel(x_val, MISH_THRESHOLD) ); } } @@ -286,11 +292,12 @@ __global__ void gradient_array_mish_kernel(int n, float *activation_input_gpu, f if (i < n) { const float MISH_THRESHOLD = 20.0f; - // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed + // implementation from TensorFlow: https://github.com/tensorflow/addons/blob/093cdfa85d334cbe19a37624c33198f3140109ed/tensorflow_addons/custom_ops/activations/cc/kernels/mish_op.h#L66-L80 // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 - float inp = activation_input_gpu[i]; - const float sp = (inp < MISH_THRESHOLD) ? log1p(exp(inp)) : inp; - const float grad_sp = 1 - exp(-sp); + // log1p(x) == log(x + 1) + const float inp = activation_input_gpu[i]; + const float sp = softplus_kernel(inp, MISH_THRESHOLD); + const float grad_sp = 1 - expf(-sp); const float tsp = tanh(sp); const float grad_tsp = (1 - tsp*tsp) * grad_sp; const float grad = inp * grad_tsp + tsp; diff --git a/src/activations.c b/src/activations.c index 83580cb24a5..347a13ac47b 100644 --- a/src/activations.c +++ b/src/activations.c @@ -143,9 +143,7 @@ void activate_array_mish(float *x, const int n, float * activation_input, float for (i = 0; i < n; ++i) { float x_val = x[i]; activation_input[i] = x_val; // store value before activation - //output[i] = x_val * tanh_activate(log(1 + expf(x_val))); - if (x_val < MISH_THRESHOLD) output[i] = x_val * tanh_activate(log(expf(x_val))); - else output[i] = x_val * tanh_activate(x_val); + output[i] = x_val * tanh_activate( softplus_activate(x_val, MISH_THRESHOLD) ); } } @@ -215,7 +213,7 @@ void gradient_array_mish(const int n, const float * activation_input, float * de // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 float inp = activation_input[i]; - const float sp = (inp < MISH_THRESHOLD) ? log1p(exp(inp)) : inp; + const float sp = softplus_activate(inp, MISH_THRESHOLD); const float grad_sp = 1 - exp(-sp); const float tsp = tanh(sp); const float grad_tsp = (1 - tsp*tsp) * grad_sp; diff --git a/src/activations.h b/src/activations.h index bba5ca8d10a..edd5b304ff8 100644 --- a/src/activations.h +++ b/src/activations.h @@ -53,6 +53,11 @@ static inline float relie_activate(float x){return (x>0) ? x : .01f*x;} static inline float ramp_activate(float x){return x*(x>0)+.1f*x;} static inline float leaky_activate(float x){return (x>0) ? x : .1f*x;} static inline float tanh_activate(float x){return (expf(2*x)-1)/(expf(2*x)+1);} +static inline float softplus_activate(float x, float threshold) { + if (x > threshold) return x; // too large + else if (x < -threshold) return expf(x); // too small + return logf(expf(x) + 1); +} static inline float plse_activate(float x) { if(x < -4) return .01f * (x + 4); From 4f70fc14e9bd33d24efd64dff16e959e2e391d39 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 23 Nov 2019 16:42:43 +0300 Subject: [PATCH 81/86] Added DIoU and CIoU to [yolo] and [Gaussian_yolo] --- include/darknet.h | 19 +- src/box.c | 443 ++++++++++++++++++++++++++++++++++---- src/box.h | 1 + src/demo.c | 5 +- src/detector.c | 17 +- src/gaussian_yolo_layer.c | 102 +++++++-- src/parser.c | 36 +++- src/utils.c | 19 ++ src/utils.h | 1 + src/yolo_layer.c | 73 +++++++ 10 files changed, 644 insertions(+), 72 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 7a906780c61..851b65421c8 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -107,9 +107,20 @@ typedef enum { // parser.h typedef enum { - IOU, GIOU, MSE + IOU, GIOU, MSE, DIOU, CIOU } IOU_LOSS; +// parser.h +typedef enum { + DEFAULT_NMS, GREEDY_NMS, DIOU_NMS +} NMS_KIND; + +// parser.h +typedef enum { + YOLO_CENTER, YOLO_LEFT_TOP, YOLO_RIGHT_BOTTOM +} YOLO_POINT; + + // image.h typedef enum{ PNG, BMP, TGA, JPG @@ -334,6 +345,9 @@ struct layer { float iou_normalizer; float cls_normalizer; IOU_LOSS iou_loss; + NMS_KIND nms_kind; + float beta_nms; + YOLO_POINT yolo_point; char *align_bit_weights_gpu; float *mean_arr_gpu; @@ -719,7 +733,7 @@ typedef struct dxrep { // box.h typedef struct ious { - float iou, giou; + float iou, giou, diou, ciou; dxrep dx_iou; dxrep dx_giou; } ious; @@ -835,6 +849,7 @@ LIB_API load_args get_base_args(network *net); // box.h LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); LIB_API void do_nms_obj(detection *dets, int total, int classes, float thresh); +LIB_API void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1); // network.h LIB_API float *network_predict(network net, float *input); diff --git a/src/box.c b/src/box.c index cb28ce8e4d7..e700dc7140c 100644 --- a/src/box.c +++ b/src/box.c @@ -1,8 +1,12 @@ -#include "box.h" +#include "box.h" #include #include #include +#ifndef M_PI +#define M_PI 3.141592 +#endif + box float_to_box(float *f) { box b; @@ -23,6 +27,23 @@ box float_to_box_stride(float *f, int stride) return b; } + +dbox derivative(box a, box b) +{ + dbox d; + d.dx = 0; + d.dw = 0; + d.dy = 0; + d.dh = 0; + d.dx = a.x < b.x ? 1.0 : -1.0; + d.dy = a.y < b.y ? 1.0 : -1.0; + d.dw = a.w < b.w ? 1.0 : -1.0; + d.dh = a.h < b.h ? 1.0 : -1.0; + return d; +} + + +/* dbox derivative(box a, box b) { dbox d; @@ -73,6 +94,7 @@ dbox derivative(box a, box b) } return d; } +*/ // where c is the smallest box that fully encompases a and b boxabs box_c(box a, box b) { @@ -155,13 +177,75 @@ float box_giou(box a, box b) return iou - giou_term; } +float box_diou(box a, box b) +{ + boxabs ba = box_c(a, b); + float w = ba.right - ba.left; + float h = ba.bot - ba.top; + float c = w * w + h * h; + float iou = box_iou(a, b); + if (c == 0) { + return iou; + } + float d = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); + float u = pow(d / c, 0.6); + float diou_term = u; +#ifdef DEBUG_PRINTS + printf(" c: %f, u: %f, riou_term: %f\n", c, u, diou_term); +#endif + return iou - diou_term; +} + +float box_diounms(box a, box b, float beta1) +{ + boxabs ba = box_c(a, b); + float w = ba.right - ba.left; + float h = ba.bot - ba.top; + float c = w * w + h * h; + float iou = box_iou(a, b); + if (c == 0) { + return iou; + } + float d = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); + float u = pow(d / c, beta1); + float diou_term = u; +#ifdef DEBUG_PRINTS + printf(" c: %f, u: %f, riou_term: %f\n", c, u, diou_term); +#endif + return iou - diou_term; +} + +float box_ciou(box a, box b) +{ + boxabs ba = box_c(a, b); + float w = ba.right - ba.left; + float h = ba.bot - ba.top; + float c = w * w + h * h; + float iou = box_iou(a, b); + if (c == 0) { + return iou; + } + float u = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); + float d = u / c; + float ar_gt = b.w / b.h; + float ar_pred = a.w / a.h; + float ar_loss = 4 / (M_PI * M_PI) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred)); + float alpha = ar_loss / (1 - iou + ar_loss + 0.000001); + float ciou_term = d + alpha * ar_loss; //ciou +#ifdef DEBUG_PRINTS + printf(" c: %f, u: %f, riou_term: %f\n", c, u, ciou_term); +#endif + return iou - ciou_term; +} + dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { - boxabs pred_tblr = to_tblr(pred); + boxabs pred_tblr = to_tblr(pred); float pred_t = fmin(pred_tblr.top, pred_tblr.bot); float pred_b = fmax(pred_tblr.top, pred_tblr.bot); float pred_l = fmin(pred_tblr.left, pred_tblr.right); float pred_r = fmax(pred_tblr.left, pred_tblr.right); - + //dbox dover = derivative(pred,truth); + //dbox diouu = diou(pred, truth); boxabs truth_tblr = to_tblr(truth); #ifdef DEBUG_PRINTS printf("\niou: %f, giou: %f\n", box_iou(pred, truth), box_giou(pred, truth)); @@ -170,24 +254,39 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { #endif //printf("pred (t,b,l,r): (%f, %f, %f, %f)\n", pred_t, pred_b, pred_l, pred_r); //printf("trut (t,b,l,r): (%f, %f, %f, %f)\n", truth_tblr.top, truth_tblr.bot, truth_tblr.left, truth_tblr.right); - dxrep dx = { 0 }; + dxrep ddx = {0}; float X = (pred_b - pred_t) * (pred_r - pred_l); float Xhat = (truth_tblr.bot - truth_tblr.top) * (truth_tblr.right - truth_tblr.left); float Ih = fmin(pred_b, truth_tblr.bot) - fmax(pred_t, truth_tblr.top); float Iw = fmin(pred_r, truth_tblr.right) - fmax(pred_l, truth_tblr.left); float I = Iw * Ih; float U = X + Xhat - I; - - float Cw = fmax(pred_r, truth_tblr.right) - fmin(pred_l, truth_tblr.left); - float Ch = fmax(pred_b, truth_tblr.bot) - fmin(pred_t, truth_tblr.top); - float C = Cw * Ch; - - // float IoU = I / U; - // Partial Derivatives, derivatives + float S = (pred.x-truth.x)*(pred.x-truth.x)+(pred.y-truth.y)*(pred.y-truth.y); + float giou_Cw = fmax(pred_r, truth_tblr.right) - fmin(pred_l, truth_tblr.left); + float giou_Ch = fmax(pred_b, truth_tblr.bot) - fmin(pred_t, truth_tblr.top); + float giou_C = giou_Cw * giou_Ch; + //float IoU = I / U; +//#ifdef DEBUG_PRINTS + //printf("X: %f", X); + //printf(", Xhat: %f", Xhat); + //printf(", Ih: %f", Ih); + //printf(", Iw: %f", Iw); + //printf(", I: %f", I); + //printf(", U: %f", U); + //printf(", IoU: %f\n", I / U); +//#endif + + //Partial Derivatives, derivatives float dX_wrt_t = -1 * (pred_r - pred_l); float dX_wrt_b = pred_r - pred_l; float dX_wrt_l = -1 * (pred_b - pred_t); float dX_wrt_r = pred_b - pred_t; + // UNUSED + //// Ground truth + //float dXhat_wrt_t = -1 * (truth_tblr.right - truth_tblr.left); + //float dXhat_wrt_b = truth_tblr.right - truth_tblr.left; + //float dXhat_wrt_l = -1 * (truth_tblr.bot - truth_tblr.top); + //float dXhat_wrt_r = truth_tblr.bot - truth_tblr.top; // gradient of I min/max in IoU calc (prediction) float dI_wrt_t = pred_t > truth_tblr.top ? (-1 * Iw) : 0; @@ -200,42 +299,262 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { float dU_wrt_l = dX_wrt_l - dI_wrt_l; float dU_wrt_r = dX_wrt_r - dI_wrt_r; // gradient of C min/max in IoU calc (prediction) - float dC_wrt_t = pred_t < truth_tblr.top ? (-1 * Cw) : 0; - float dC_wrt_b = pred_b > truth_tblr.bot ? Cw : 0; - float dC_wrt_l = pred_l < truth_tblr.left ? (-1 * Ch) : 0; - float dC_wrt_r = pred_r > truth_tblr.right ? Ch : 0; + float dC_wrt_t = pred_t < truth_tblr.top ? (-1 * giou_Cw) : 0; + float dC_wrt_b = pred_b > truth_tblr.bot ? giou_Cw : 0; + float dC_wrt_l = pred_l < truth_tblr.left ? (-1 * giou_Ch) : 0; + float dC_wrt_r = pred_r > truth_tblr.right ? giou_Ch : 0; - // Final IOU loss (prediction) (negative of IOU gradient, we want the negative loss) float p_dt = 0; float p_db = 0; float p_dl = 0; float p_dr = 0; - if (U > 0) { - p_dt = ((U * dI_wrt_t) - (I * dU_wrt_t)) / (U * U); - p_db = ((U * dI_wrt_b) - (I * dU_wrt_b)) / (U * U); - p_dl = ((U * dI_wrt_l) - (I * dU_wrt_l)) / (U * U); - p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U); + if (U > 0 ) { + p_dt = ((U * dI_wrt_t) - (I * dU_wrt_t)) / (U * U); + p_db = ((U * dI_wrt_b) - (I * dU_wrt_b)) / (U * U); + p_dl = ((U * dI_wrt_l) - (I * dU_wrt_l)) / (U * U); + p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U); } + // apply grad from prediction min/max for correct corner selection + p_dt = pred_tblr.top < pred_tblr.bot ? p_dt : p_db; + p_db = pred_tblr.top < pred_tblr.bot ? p_db : p_dt; + p_dl = pred_tblr.left < pred_tblr.right ? p_dl : p_dr; + p_dr = pred_tblr.left < pred_tblr.right ? p_dr : p_dl; - // GIoU = I/U - (C-U)/C - // C is the smallest convex hull that encloses both Detection and Truth if (iou_loss == GIOU) { + if (giou_C > 0) { + // apply "C" term from gIOU + p_dt += ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C); + p_db += ((giou_C * dU_wrt_b) - (U * dC_wrt_b)) / (giou_C * giou_C); + p_dl += ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C); + p_dr += ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C); + } + if (Iw<=0||Ih<=0) { + p_dt = ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C); + p_db = ((giou_C * dU_wrt_b) - (U * dC_wrt_b)) / (giou_C * giou_C); + p_dl = ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C); + p_dr = ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C); + } + } + + float Ct = fmin(pred.y - pred.h / 2,truth.y - truth.h / 2); + float Cb = fmax(pred.y + pred.h / 2,truth.y + truth.h / 2); + float Cl = fmin(pred.x - pred.w / 2,truth.x - truth.w / 2); + float Cr = fmax(pred.x + pred.w / 2,truth.x + truth.w / 2); + float Cw = Cr - Cl; + float Ch = Cb - Ct; + float C = Cw * Cw + Ch * Ch; + + float dCt_dx = 0; + float dCt_dy = pred_t < truth_tblr.top ? 1 : 0; + float dCt_dw = 0; + float dCt_dh = pred_t < truth_tblr.top ? -0.5 : 0; + + float dCb_dx = 0; + float dCb_dy = pred_b > truth_tblr.bot ? 1 : 0; + float dCb_dw = 0; + float dCb_dh = pred_b > truth_tblr.bot ? 0.5: 0; + + float dCl_dx = pred_l < truth_tblr.left ? 1 : 0; + float dCl_dy = 0; + float dCl_dw = pred_l < truth_tblr.left ? -0.5 : 0; + float dCl_dh = 0; + + float dCr_dx = pred_r > truth_tblr.right ? 1 : 0; + float dCr_dy = 0; + float dCr_dw = pred_r > truth_tblr.right ? 0.5 : 0; + float dCr_dh = 0; + + float dCw_dx = dCr_dx - dCl_dx; + float dCw_dy = dCr_dy - dCl_dy; + float dCw_dw = dCr_dw - dCl_dw; + float dCw_dh = dCr_dh - dCl_dh; + + float dCh_dx = dCb_dx - dCt_dx; + float dCh_dy = dCb_dy - dCt_dy; + float dCh_dw = dCb_dw - dCt_dw; + float dCh_dh = dCb_dh - dCt_dh; + + // UNUSED + //// ground truth + //float dI_wrt_xhat_t = pred_t < truth_tblr.top ? (-1 * Iw) : 0; + //float dI_wrt_xhat_b = pred_b > truth_tblr.bot ? Iw : 0; + //float dI_wrt_xhat_l = pred_l < truth_tblr.left ? (-1 * Ih) : 0; + //float dI_wrt_xhat_r = pred_r > truth_tblr.right ? Ih : 0; + + // Final IOU loss (prediction) (negative of IOU gradient, we want the negative loss) + float p_dx = 0; + float p_dy = 0; + float p_dw = 0; + float p_dh = 0; + + p_dx = p_dl + p_dr; //p_dx, p_dy, p_dw and p_dh are the gradient of IoU or GIoU. + p_dy = p_dt + p_db; + p_dw = (p_dr - p_dl); //For dw and dh, we do not divided by 2. + p_dh = (p_db - p_dt); + if (iou_loss == DIOU) { + if (C > 0) { + p_dx += (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); + p_dy += (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); + p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C); + p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C); + } + if (Iw<=0||Ih<=0){ + p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); + p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); + p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C); + p_dh = (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C); + } + } + //The following codes are calculating the gradient of ciou. + + if (iou_loss == CIOU) { + float ar_gt = truth.w / truth.h; + float ar_pred = pred.w / pred.h; + float ar_loss = 4 / (M_PI * M_PI) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred)); + float alpha = ar_loss / (1 - I/U + ar_loss + 0.000001); + float ar_dw=8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.h; + float ar_dh=-8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.w; if (C > 0) { - // apply "C" term from gIOU - p_dt += ((C * dU_wrt_t) - (U * dC_wrt_t)) / (C * C); - p_db += ((C * dU_wrt_b) - (U * dC_wrt_b)) / (C * C); - p_dl += ((C * dU_wrt_l) - (U * dC_wrt_l)) / (C * C); - p_dr += ((C * dU_wrt_r) - (U * dC_wrt_r)) / (C * C); + // dar* + p_dx += (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); + p_dy += (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); + p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw; + p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C) + alpha * ar_dh; + } + if (Iw<=0||Ih<=0){ + p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); + p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); + p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw; + p_dh = (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C) + alpha * ar_dh; } } + ddx.dt = p_dx; //We follow the original code released from GDarknet. So in yolo_layer.c, dt, db, dl, dr are already dx, dy, dw, dh. + ddx.db = p_dy; + ddx.dl = p_dw; + ddx.dr = p_dh; + + // UNUSED + //// ground truth + //float gt_dt = ((U * dI_wrt_xhat_t) - (I * (dXhat_wrt_t - dI_wrt_xhat_t))) / (U * U); + //float gt_db = ((U * dI_wrt_xhat_b) - (I * (dXhat_wrt_b - dI_wrt_xhat_b))) / (U * U); + //float gt_dl = ((U * dI_wrt_xhat_l) - (I * (dXhat_wrt_l - dI_wrt_xhat_l))) / (U * U); + //float gt_dr = ((U * dI_wrt_xhat_r) - (I * (dXhat_wrt_r - dI_wrt_xhat_r))) / (U * U); + + // no min/max grad applied + //dx.dt = dt; + //dx.db = db; + //dx.dl = dl; + //dx.dr = dr; + + //// sum in gt -- THIS DOESNT WORK + //dx.dt += gt_dt; + //dx.db += gt_db; + //dx.dl += gt_dl; + //dx.dr += gt_dr; + + //// instead, look at the change between pred and gt, and weight t/b/l/r appropriately... + //// need the real derivative here (I think?) + //float delta_t = fmax(truth_tblr.top, pred_t) - fmin(truth_tblr.top, pred_t); + //float delta_b = fmax(truth_tblr.bot, pred_b) - fmin(truth_tblr.bot, pred_b); + //float delta_l = fmax(truth_tblr.left, pred_l) - fmin(truth_tblr.left, pred_l); + //float delta_r = fmax(truth_tblr.right, pred_r) - fmin(truth_tblr.right, pred_r); + + //dx.dt *= delta_t / (delta_t + delta_b); + //dx.db *= delta_b / (delta_t + delta_b); + //dx.dl *= delta_l / (delta_l + delta_r); + //dx.dr *= delta_r / (delta_l + delta_r); + + // UNUSED + //// ground truth + //float gt_dt = ((U * dI_wrt_xhat_t) - (I * (dXhat_wrt_t - dI_wrt_xhat_t))) / (U * U); + //float gt_db = ((U * dI_wrt_xhat_b) - (I * (dXhat_wrt_b - dI_wrt_xhat_b))) / (U * U); + //float gt_dl = ((U * dI_wrt_xhat_l) - (I * (dXhat_wrt_l - dI_wrt_xhat_l))) / (U * U); + //float gt_dr = ((U * dI_wrt_xhat_r) - (I * (dXhat_wrt_r - dI_wrt_xhat_r))) / (U * U); + + // no min/max grad applied + //dx.dt = dt; + //dx.db = db; + //dx.dl = dl; + //dx.dr = dr; + // apply grad from prediction min/max for correct corner selection - dx.dt = pred_tblr.top < pred_tblr.bot ? p_dt : p_db; - dx.db = pred_tblr.top < pred_tblr.bot ? p_db : p_dt; - dx.dl = pred_tblr.left < pred_tblr.right ? p_dl : p_dr; - dx.dr = pred_tblr.left < pred_tblr.right ? p_dr : p_dl; + //dx.dt = pred_tblr.top < pred_tblr.bot ? p_dt : p_db; + //dx.db = pred_tblr.top < pred_tblr.bot ? p_db : p_dt; + //dx.dl = pred_tblr.left < pred_tblr.right ? p_dl : p_dr; + //dx.dr = pred_tblr.left < pred_tblr.right ? p_dr : p_dl; + + //// sum in gt -- THIS DOESNT WORK + //dx.dt += gt_dt; + //dx.db += gt_db; + //dx.dl += gt_dl; + //dx.dr += gt_dr; + + //// instead, look at the change between pred and gt, and weight t/b/l/r appropriately... + //// need the real derivative here (I think?) + //float delta_t = fmax(truth_tblr.top, pred_t) - fmin(truth_tblr.top, pred_t); + //float delta_b = fmax(truth_tblr.bot, pred_b) - fmin(truth_tblr.bot, pred_b); + //float delta_l = fmax(truth_tblr.left, pred_l) - fmin(truth_tblr.left, pred_l); + //float delta_r = fmax(truth_tblr.right, pred_r) - fmin(truth_tblr.right, pred_r); + + //dx.dt *= delta_t / (delta_t + delta_b); + //dx.db *= delta_b / (delta_t + delta_b); + //dx.dl *= delta_l / (delta_l + delta_r); + //dx.dr *= delta_r / (delta_l + delta_r); + +//#ifdef DEBUG_PRINTS + /*printf(" directions dt: "); + if ((pred_tblr.top < truth_tblr.top && dx.dt > 0) || (pred_tblr.top > truth_tblr.top && dx.dt < 0)) { + printf("✓"); + } else { + printf("𝒙"); + } + printf(", "); + if ((pred_tblr.bot < truth_tblr.bot && dx.db > 0) || (pred_tblr.bot > truth_tblr.bot && dx.db < 0)) { + printf("✓"); + } else { + printf("𝒙"); + } + printf(", "); + if ((pred_tblr.left < truth_tblr.left && dx.dl > 0) || (pred_tblr.left > truth_tblr.left && dx.dl < 0)) { + printf("✓"); + } else { + printf("𝒙"); + } + printf(", "); + if ((pred_tblr.right < truth_tblr.right && dx.dr > 0) || (pred_tblr.right > truth_tblr.right && dx.dr < 0)) { + printf("✓"); + } else { + printf("𝒙"); + } + printf("\n"); + + printf("dx dt:%f", dx.dt); + printf(", db: %f", dx.db); + printf(", dl: %f", dx.dl); + printf(", dr: %f | ", dx.dr); +#endif - return dx; +#ifdef DEBUG_NAN + if (isnan(dx.dt)) { printf("dt isnan\n"); } + if (isnan(dx.db)) { printf("db isnan\n"); } + if (isnan(dx.dl)) { printf("dl isnan\n"); } + if (isnan(dx.dr)) { printf("dr isnan\n"); } +#endif + +// // No update if 0 or nan +// if (dx.dt == 0 || isnan(dx.dt)) { dx.dt = 1; } +// if (dx.db == 0 || isnan(dx.db)) { dx.db = 1; } +// if (dx.dl == 0 || isnan(dx.dl)) { dx.dl = 1; } +// if (dx.dr == 0 || isnan(dx.dr)) { dx.dr = 1; } +// +//#ifdef DEBUG_PRINTS +// printf("dx dt:%f (t: %f, p: %f)", dx.dt, gt_dt, p_dt); +// printf(", db: %f (t: %f, p: %f)", dx.db, gt_db, p_db); +// printf(", dl: %f (t: %f, p: %f)", dx.dl, gt_dl, p_dl); +// printf(", dr: %f (t: %f, p: %f) | ", dx.dr, gt_dr, p_dr); +//#endif */ + return ddx; } float box_rmse(box a, box b) @@ -351,13 +670,13 @@ void test_box() dbox diou(box a, box b) { - float u = box_union(a,b); - float i = box_intersection(a,b); - dbox di = dintersect(a,b); - dbox du = dunion(a,b); - dbox dd = {0,0,0,0}; + float u = box_union(a, b); + float i = box_intersection(a, b); + dbox di = dintersect(a, b); + dbox du = dunion(a, b); + dbox dd = { 0,0,0,0 }; - if(i <= 0 || 1) { + if (i <= 0 || 1) { dd.dx = b.x - a.x; dd.dy = b.y - a.y; dd.dw = b.w - a.w; @@ -365,10 +684,10 @@ dbox diou(box a, box b) return dd; } - dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u); - dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u); - dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u); - dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u); + dd.dx = (di.dx*u - du.dx*i) / (u*u); + dd.dy = (di.dy*u - du.dy*i) / (u*u); + dd.dw = (di.dw*u - du.dw*i) / (u*u); + dd.dh = (di.dh*u - du.dh*i) / (u*u); return dd; } @@ -524,6 +843,44 @@ void do_nms(box *boxes, float **probs, int total, int classes, float thresh) } } +void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1) +{ + int i, j, k; + k = total - 1; + for (i = 0; i <= k; ++i) { + if (dets[i].objectness == 0) { + detection swap = dets[i]; + dets[i] = dets[k]; + dets[k] = swap; + --k; + --i; + } + } + total = k + 1; + + for (k = 0; k < classes; ++k) { + for (i = 0; i < total; ++i) { + dets[i].sort_class = k; + } + qsort(dets, total, sizeof(detection), nms_comparator); + for (i = 0; i < total; ++i) { + if (dets[i].prob[k] == 0) continue; + box a = dets[i].bbox; + for (j = i + 1; j < total; ++j) { + box b = dets[j].bbox; + if (box_iou(a, b) > thresh && nms_kind == GREEDY_NMS) { + dets[j].prob[k] = 0; + } + else { + if (box_diounms(a, b, beta1) > thresh && nms_kind == DIOU_NMS) { + dets[j].prob[k] = 0; + } + } + } + } + } +} + box encode_box(box b, box anchor) { box encode; diff --git a/src/box.h b/src/box.h index 172c135293c..c7f0fb4394a 100644 --- a/src/box.h +++ b/src/box.h @@ -42,6 +42,7 @@ void do_nms(box *boxes, float **probs, int total, int classes, float thresh); void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh); //LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); //LIB_API void do_nms_obj(detection *dets, int total, int classes, float thresh); +//LIB_API void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1); box decode_box(box b, box anchor); box encode_box(box b, box anchor); diff --git a/src/demo.c b/src/demo.c index 6c7f5d39848..63debde1a9d 100644 --- a/src/demo.c +++ b/src/demo.c @@ -213,7 +213,10 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int detection *local_dets = dets; //if (nms) do_nms_obj(local_dets, local_nboxes, l.classes, nms); // bad results - if (nms) do_nms_sort(local_dets, local_nboxes, l.classes, nms); + if (nms) { + if (l.nms_kind == DEFAULT_NMS) do_nms_sort(local_dets, local_nboxes, l.classes, nms); + else diounms_sort(local_dets, local_nboxes, l.classes, nms, l.nms_kind, l.beta_nms); + } //printf("\033[2J"); //printf("\033[1;1H"); diff --git a/src/detector.c b/src/detector.c index 8177343ef3d..feb86d5583f 100644 --- a/src/detector.c +++ b/src/detector.c @@ -243,7 +243,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i calc_map_for_each = fmax(calc_map_for_each, 100); int next_map_calc = iter_map + calc_map_for_each; next_map_calc = fmax(next_map_calc, net.burn_in); - next_map_calc = fmax(next_map_calc, 400); + //next_map_calc = fmax(next_map_calc, 400); if (calc_map) { printf("\n (next mAP calculation at %d iterations) ", next_map_calc); if (mean_average_precision > 0) printf("\n Last accuracy mAP@0.5 = %2.2f %%, best = %2.2f %% ", mean_average_precision * 100, best_map * 100); @@ -537,7 +537,10 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out int nboxes = 0; int letterbox = (args.type == LETTERBOX_DATA); detection *dets = get_network_boxes(&net, w, h, thresh, .5, map, 0, &nboxes, letterbox); - if (nms) do_nms_sort(dets, nboxes, classes, nms); + if (nms) { + if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); + else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); + } if (coco) { print_cocos(fp, path, dets, nboxes, classes, w, h); } @@ -800,7 +803,10 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa dets = get_network_boxes(&net, 1, 1, thresh, hier_thresh, 0, 0, &nboxes, letter_box); } //detection *dets = get_network_boxes(&net, val[t].w, val[t].h, thresh, hier_thresh, 0, 1, &nboxes, letter_box); // for letter_box=1 - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + if (nms) { + if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); + else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); + } //if (nms) do_nms_obj(dets, nboxes, l.classes, nms); char labelpath[4096]; @@ -1354,7 +1360,10 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam int nboxes = 0; detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letter_box); - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); + if (nms) { + if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); + else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); + } draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, ext_output); save_image(im, "predictions"); if (!dont_show) { diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 109eb522c3a..ae3ee648140 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -130,24 +130,43 @@ void resize_gaussian_yolo_layer(layer *l, int w, int h) #endif } -box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) +box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride, YOLO_POINT yolo_point) { box b; - b.x = (i + x[index + 0*stride]) / lw; - b.y = (j + x[index + 2*stride]) / lh; - b.w = exp(x[index + 4*stride]) * biases[2*n] / w; - b.h = exp(x[index + 6*stride]) * biases[2*n+1] / h; + + if (yolo_point == YOLO_CENTER) { + b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; + b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; + b.x = (i + x[index + 0 * stride]) / lw; + b.y = (j + x[index + 2 * stride]) / lh; + } + else if (yolo_point == YOLO_LEFT_TOP) { + b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; + b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; + b.x = (i + x[index + 0 * stride]) / lw + b.w/2; + b.y = (j + x[index + 2 * stride]) / lh + b.h/2; + } + else if (yolo_point == YOLO_RIGHT_BOTTOM) { + b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; + b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; + b.x = (i + x[index + 0 * stride]) / lw - b.w / 2; + b.y = (j + x[index + 2 * stride]) / lh - b.h / 2; + } + return b; } -float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate) +float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, + float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point) { - box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); + box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point); float iou; ious all_ious = { 0 }; all_ious.iou = box_iou(pred, truth); all_ious.giou = box_giou(pred, truth); + all_ious.diou = box_diou(pred, truth); + all_ious.ciou = box_ciou(pred, truth); if (pred.w == 0) { pred.w = 1.0; } if (pred.h == 0) { pred.h = 1.0; } @@ -158,10 +177,24 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind iou = all_ious.iou; - float tx = (truth.x*lw - i); - float ty = (truth.y*lh - j); - float tw = log(truth.w*w / biases[2 * n]); - float th = log(truth.h*h / biases[2 * n + 1]); + float tx, ty, tw, th; + + tw = log(truth.w*w / biases[2 * n]); + th = log(truth.h*h / biases[2 * n + 1]); + + if (yolo_point == YOLO_CENTER) { + tx = (truth.x*lw - i); + ty = (truth.y*lh - j); + + } + else if (yolo_point == YOLO_LEFT_TOP) { + tx = ((truth.x - truth.w / 2)*lw - i); + ty = ((truth.y - truth.h / 2)*lh - j); + } + else if (yolo_point == YOLO_RIGHT_BOTTOM) { + tx = ((truth.x + truth.w / 2)*lw - i); + ty = ((truth.y + truth.h / 2)*lh - j); + } dx = (tx - x[index + 0 * stride]); dy = (ty - x[index + 2 * stride]); @@ -220,9 +253,24 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind // https://giou.stanford.edu/ all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); + float dx, dy; + + if (yolo_point == YOLO_CENTER) { + dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + } + else if (yolo_point == YOLO_LEFT_TOP) { + dx = all_ious.dx_iou.dl; + dy = all_ious.dx_iou.dt; + } + else if (yolo_point == YOLO_RIGHT_BOTTOM) { + dx = all_ious.dx_iou.dr; + dy = all_ious.dx_iou.db; + } + // jacobian^t (transpose) - float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + //float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + //float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); @@ -360,7 +408,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) for (i = 0; i < l.w; ++i) { for (n = 0; n < l.n; ++n) { int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); - box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); + box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h, l.yolo_point); float best_match_iou = 0; int best_match_t = 0; float best_iou = 0; @@ -405,7 +453,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1); + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); } } } @@ -416,8 +464,22 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if(!truth.x) break; float best_iou = 0; int best_n = 0; - i = (truth.x * l.w); - j = (truth.y * l.h); + //i = (truth.x * l.w); + //j = (truth.y * l.h); + + if (l.yolo_point == YOLO_CENTER) { + i = (truth.x * l.w); + j = (truth.y * l.h); + } + else if (l.yolo_point == YOLO_LEFT_TOP) { + i = ((truth.x - truth.w / 2) * l.w); + j = ((truth.y - truth.h / 2) * l.h); + } + else if (l.yolo_point == YOLO_RIGHT_BOTTOM) { + i = ((truth.x + truth.w / 2) * l.w); + j = ((truth.y + truth.h / 2) * l.h); + } + box truth_shift = truth; truth_shift.x = truth_shift.y = 0; for(n = 0; n < l.total; ++n){ @@ -434,7 +496,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if(mask_n >= 0){ int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; @@ -465,7 +527,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if (iou > l.iou_thresh) { int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; @@ -671,7 +733,7 @@ int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, floa if (objectness > thresh) { int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); - dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h, l.yolo_point); dets[count].objectness = objectness; dets[count].classes = l.classes; diff --git a/src/parser.c b/src/parser.c index 3728c4422f9..e4e970695f0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -372,8 +372,21 @@ layer parse_yolo(list *options, size_params params) if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU; + else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU; + else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU; else l.iou_loss = IOU; - fprintf(stderr, "[yolo] params: iou loss: %s, iou_norm: %2.2f, cls_norm: %2.2f, scale_x_y: %2.2f\n", (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer, l.scale_x_y); + fprintf(stderr, "[yolo] params: iou loss: %s (%d), iou_norm: %2.2f, cls_norm: %2.2f, scale_x_y: %2.2f\n", + iou_loss, l.iou_loss, l.iou_normalizer, l.cls_normalizer, l.scale_x_y); + + l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6); + char *nms_kind = option_find_str(options, "nms_kind", "default"); + if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS; + else { + if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS; + else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS; + else l.nms_kind = DEFAULT_NMS; + printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms); + } l.jitter = option_find_float(options, "jitter", .2); l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); @@ -451,8 +464,27 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU; + else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU; + else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU; else l.iou_loss = IOU; - fprintf(stderr, "[Gaussian_yolo] iou loss: %s, iou_norm: %2.2f, cls_norm: %2.2f, scale: %2.2f\n", (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.cls_normalizer, l.scale_x_y); + + l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6); + char *nms_kind = option_find_str(options, "nms_kind", "default"); + if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS; + else { + if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS; + else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS; + else l.nms_kind = DEFAULT_NMS; + printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms); + } + + char *yolo_point = option_find_str_quiet(options, "yolo_point", "center"); + if (strcmp(yolo_point, "left_top") == 0) l.yolo_point = YOLO_LEFT_TOP; + else if (strcmp(yolo_point, "right_bottom") == 0) l.yolo_point = YOLO_RIGHT_BOTTOM; + else l.yolo_point = YOLO_CENTER; + + fprintf(stderr, "[Gaussian_yolo] iou loss: %s (%d), iou_norm: %2.2f, cls_norm: %2.2f, scale: %2.2f, point: %d\n", + iou_loss, l.iou_loss, l.iou_normalizer, l.cls_normalizer, l.scale_x_y, l.yolo_point); l.jitter = option_find_float(options, "jitter", .2); diff --git a/src/utils.c b/src/utils.c index 4651cc0a1e8..af7cb9eaf01 100644 --- a/src/utils.c +++ b/src/utils.c @@ -916,4 +916,23 @@ int max_int_index(int *a, int n) } } return max_i; +} + +// Absolute box from relative coordinate bounding box and image size +boxabs box_to_boxabs(const box* b, const int img_w, const int img_h, const int bounds_check) +{ + boxabs ba; + ba.left = (b->x - b->w / 2.)*img_w; + ba.right = (b->x + b->w / 2.)*img_w; + ba.top = (b->y - b->h / 2.)*img_h; + ba.bot = (b->y + b->h / 2.)*img_h; + + if (bounds_check) { + if (ba.left < 0) ba.left = 0; + if (ba.right > img_w - 1) ba.right = img_w - 1; + if (ba.top < 0) ba.top = 0; + if (ba.bot > img_h - 1) ba.bot = img_h - 1; + } + + return ba; } \ No newline at end of file diff --git a/src/utils.h b/src/utils.h index fe4efe04ef1..998209067dd 100644 --- a/src/utils.h +++ b/src/utils.h @@ -80,6 +80,7 @@ int check_array_is_inf(float *arr, int size); int int_index(int *a, int val, int n); int *random_index_order(int min, int max); int max_int_index(int *a, int n); +boxabs box_to_boxabs(const box* b, const int img_w, const int img_h, const int bounds_check); #ifdef __cplusplus } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 08577db5ab4..7a7ba6da56e 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -137,6 +137,8 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); all_ious.iou = box_iou(pred, truth); all_ious.giou = box_giou(pred, truth); + all_ious.diou = box_diou(pred, truth); + all_ious.ciou = box_ciou(pred, truth); // avoid nan in dx_box_iou if (pred.w == 0) { pred.w = 1.0; } if (pred.h == 0) { pred.h = 1.0; } @@ -289,8 +291,12 @@ void forward_yolo_layer(const layer l, network_state state) //float avg_iou = 0; float tot_iou = 0; float tot_giou = 0; + float tot_diou = 0; + float tot_ciou = 0; float tot_iou_loss = 0; float tot_giou_loss = 0; + float tot_diou_loss = 0; + float tot_ciou_loss = 0; float recall = 0; float recall75 = 0; float avg_cat = 0; @@ -392,6 +398,12 @@ void forward_yolo_layer(const layer l, network_state state) tot_giou += all_ious.giou; tot_giou_loss += 1 - all_ious.giou; + tot_diou += all_ious.diou; + tot_diou_loss += 1 - all_ious.diou; + + tot_ciou += all_ious.ciou; + tot_ciou_loss += 1 - all_ious.ciou; + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); avg_obj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); @@ -428,6 +440,12 @@ void forward_yolo_layer(const layer l, network_state state) tot_giou += all_ious.giou; tot_giou_loss += 1 - all_ious.giou; + tot_diou += all_ious.diou; + tot_diou_loss += 1 - all_ious.diou; + + tot_ciou += all_ious.ciou; + tot_ciou_loss += 1 - all_ious.ciou; + int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); avg_obj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); @@ -508,6 +526,60 @@ void backward_yolo_layer(const layer l, network_state state) axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); } +// Converts output of the network to detection boxes +// w,h: image width,height +// netw,neth: network width,height +// relative: 1 (all callers seems to pass TRUE) +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +{ + int i; + // network height (or width) + int new_w = 0; + // network height (or width) + int new_h = 0; + // Compute scale given image w,h vs network w,h + // I think this "rotates" the image to match network to input image w/h ratio + // new_h and new_w are really just network width and height + if (((float)netw / w) < ((float)neth / h)) { + new_w = netw; + new_h = (h * netw) / w; + } + else { + new_h = neth; + new_w = (w * neth) / h; + } + // difference between network width and "rotated" width + float deltaw = netw - new_w; + // difference between network height and "rotated" height + float deltah = neth - new_h; + // ratio between rotated network width and network width + float ratiow = (float)new_w / netw; + // ratio between rotated network width and network width + float ratioh = (float)new_h / neth; + for (i = 0; i < n; ++i) { + + box b = dets[i].bbox; + // x = ( x - (deltaw/2)/netw ) / ratiow; + // x - [(1/2 the difference of the network width and rotated width) / (network width)] + b.x = (b.x - deltaw / 2. / netw) / ratiow; + b.y = (b.y - deltah / 2. / neth) / ratioh; + // scale to match rotation of incoming image + b.w *= 1 / ratiow; + b.h *= 1 / ratioh; + + // relative seems to always be == 1, I don't think we hit this condition, ever. + if (!relative) { + b.x *= w; + b.w *= w; + b.y *= h; + b.h *= h; + } + + dets[i].bbox = b; + } +} + +/* void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) { int i; @@ -542,6 +614,7 @@ void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth dets[i].bbox = b; } } +*/ int yolo_num_detections(layer l, float thresh) { From d43e09cdf24708b61cbd159822860dedbf756f1f Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 23 Nov 2019 16:44:45 +0300 Subject: [PATCH 82/86] Compile fix --- src/yolo_layer.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 7a7ba6da56e..81d2de709ae 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -530,7 +530,7 @@ void backward_yolo_layer(const layer l, network_state state) // w,h: image width,height // netw,neth: network width,height // relative: 1 (all callers seems to pass TRUE) -void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) +void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) { int i; // network height (or width) @@ -540,13 +540,19 @@ void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth // Compute scale given image w,h vs network w,h // I think this "rotates" the image to match network to input image w/h ratio // new_h and new_w are really just network width and height - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; + if (letter) { + if (((float)netw / w) < ((float)neth / h)) { + new_w = netw; + new_h = (h * netw) / w; + } + else { + new_h = neth; + new_w = (w * neth) / h; + } } else { + new_w = netw; new_h = neth; - new_w = (w * neth) / h; } // difference between network width and "rotated" width float deltaw = netw - new_w; From 14212154d9340790634e0b5e664ae27802f45e38 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 23 Nov 2019 17:37:28 +0300 Subject: [PATCH 83/86] Minor compile fix and references to DIoU-darknet repo and paper --- src/box.c | 9 +++++++++ src/box.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/src/box.c b/src/box.c index e700dc7140c..0b3e9a92d14 100644 --- a/src/box.c +++ b/src/box.c @@ -177,6 +177,8 @@ float box_giou(box a, box b) return iou - giou_term; } +// https://github.com/Zzh-tju/DIoU-darknet +// https://arxiv.org/abs/1911.08287 float box_diou(box a, box b) { boxabs ba = box_c(a, b); @@ -215,6 +217,8 @@ float box_diounms(box a, box b, float beta1) return iou - diou_term; } +// https://github.com/Zzh-tju/DIoU-darknet +// https://arxiv.org/abs/1911.08287 float box_ciou(box a, box b) { boxabs ba = box_c(a, b); @@ -391,6 +395,9 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { p_dy = p_dt + p_db; p_dw = (p_dr - p_dl); //For dw and dh, we do not divided by 2. p_dh = (p_db - p_dt); + + // https://github.com/Zzh-tju/DIoU-darknet + // https://arxiv.org/abs/1911.08287 if (iou_loss == DIOU) { if (C > 0) { p_dx += (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); @@ -843,6 +850,8 @@ void do_nms(box *boxes, float **probs, int total, int classes, float thresh) } } +// https://github.com/Zzh-tju/DIoU-darknet +// https://arxiv.org/abs/1911.08287 void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1) { int i, j, k; diff --git a/src/box.h b/src/box.h index c7f0fb4394a..608a4c5869d 100644 --- a/src/box.h +++ b/src/box.h @@ -36,6 +36,8 @@ float box_iou(box a, box b); float box_rmse(box a, box b); dxrep dx_box_iou(box a, box b, IOU_LOSS iou_loss); float box_giou(box a, box b); +float box_diou(box a, box b); +float box_ciou(box a, box b); dbox diou(box a, box b); boxabs to_tblr(box a); void do_nms(box *boxes, float **probs, int total, int classes, float thresh); From 8cb3ee4e7956efbb4f858469256c9b139c511ab2 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 23 Nov 2019 18:59:29 +0300 Subject: [PATCH 84/86] diounms_sort() fixed --- include/darknet.h | 5 +++-- src/box.c | 32 +++++++++++++++++++++++++++++--- src/gaussian_yolo_layer.c | 3 +++ src/parser.c | 1 + 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 851b65421c8..20f87475b3d 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -112,12 +112,12 @@ typedef enum { // parser.h typedef enum { - DEFAULT_NMS, GREEDY_NMS, DIOU_NMS + DEFAULT_NMS, GREEDY_NMS, DIOU_NMS, CORNERS_NMS } NMS_KIND; // parser.h typedef enum { - YOLO_CENTER, YOLO_LEFT_TOP, YOLO_RIGHT_BOTTOM + YOLO_CENTER = 1 << 0, YOLO_LEFT_TOP = 1 << 1, YOLO_RIGHT_BOTTOM = 1 << 2 } YOLO_POINT; @@ -748,6 +748,7 @@ typedef struct detection{ float objectness; int sort_class; float *uc; // Gaussian_YOLOv3 - tx,ty,tw,th uncertainty + int points; // bit-0 - center, bit-1 - top-left-corner, bit-2 - bottom-right-corner } detection; // matrix.h diff --git a/src/box.c b/src/box.c index 0b3e9a92d14..5e39a2548fd 100644 --- a/src/box.c +++ b/src/box.c @@ -871,13 +871,36 @@ void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIN for (i = 0; i < total; ++i) { dets[i].sort_class = k; } - qsort(dets, total, sizeof(detection), nms_comparator); - for (i = 0; i < total; ++i) { + qsort(dets, total, sizeof(detection), nms_comparator_v3); + for (i = 0; i < total; ++i) + { if (dets[i].prob[k] == 0) continue; box a = dets[i].bbox; for (j = i + 1; j < total; ++j) { box b = dets[j].bbox; - if (box_iou(a, b) > thresh && nms_kind == GREEDY_NMS) { + if (box_iou(a, b) > thresh && nms_kind == CORNERS_NMS) + { + float sum_prob = pow(dets[i].prob[k], 2) + pow(dets[j].prob[k], 2); + float alpha_prob = pow(dets[i].prob[k], 2) / sum_prob; + float beta_prob = pow(dets[j].prob[k], 2) / sum_prob; + //dets[i].bbox.x = (dets[i].bbox.x*alpha_prob + dets[j].bbox.x*beta_prob); + //dets[i].bbox.y = (dets[i].bbox.y*alpha_prob + dets[j].bbox.y*beta_prob); + //dets[i].bbox.w = (dets[i].bbox.w*alpha_prob + dets[j].bbox.w*beta_prob); + //dets[i].bbox.h = (dets[i].bbox.h*alpha_prob + dets[j].bbox.h*beta_prob); + /* + if (dets[j].points == YOLO_CENTER && (dets[i].points & dets[j].points) == 0) { + dets[i].bbox.x = (dets[i].bbox.x*alpha_prob + dets[j].bbox.x*beta_prob); + dets[i].bbox.y = (dets[i].bbox.y*alpha_prob + dets[j].bbox.y*beta_prob); + } + else if ((dets[i].points & dets[j].points) == 0) { + dets[i].bbox.w = (dets[i].bbox.w*alpha_prob + dets[j].bbox.w*beta_prob); + dets[i].bbox.h = (dets[i].bbox.h*alpha_prob + dets[j].bbox.h*beta_prob); + } + dets[i].points |= dets[j].points; + */ + dets[j].prob[k] = 0; + } + else if (box_iou(a, b) > thresh && nms_kind == GREEDY_NMS) { dets[j].prob[k] = 0; } else { @@ -886,6 +909,9 @@ void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIN } } } + + //if ((nms_kind == CORNERS_NMS) && (dets[i].points != (YOLO_CENTER | YOLO_LEFT_TOP | YOLO_RIGHT_BOTTOM))) + // dets[i].prob[k] = 0; } } } diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index ae3ee648140..2e663651b0b 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -742,6 +742,9 @@ int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, floa dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty + dets[count].points = l.yolo_point; + //if (l.yolo_point != YOLO_CENTER) dets[count].objectness = objectness = 0; + for (j = 0; j < l.classes; ++j) { int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; diff --git a/src/parser.c b/src/parser.c index e4e970695f0..f7086060483 100644 --- a/src/parser.c +++ b/src/parser.c @@ -474,6 +474,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 else { if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS; else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS; + else if (strcmp(nms_kind, "cornersnms") == 0) l.nms_kind = CORNERS_NMS; else l.nms_kind = DEFAULT_NMS; printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms); } From 61f8f569b765f12caf1801745180024e0a0903ac Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 23 Nov 2019 19:45:14 +0300 Subject: [PATCH 85/86] CIOU and DIOU fix --- src/gaussian_yolo_layer.c | 37 +++++++++++++++++++------------------ src/yolo_layer.c | 14 ++++++++++---- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 2e663651b0b..0fe8a5e16f5 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -134,21 +134,18 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int { box b; + b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; + b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; + if (yolo_point == YOLO_CENTER) { - b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; - b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; b.x = (i + x[index + 0 * stride]) / lw; b.y = (j + x[index + 2 * stride]) / lh; } else if (yolo_point == YOLO_LEFT_TOP) { - b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; - b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; - b.x = (i + x[index + 0 * stride]) / lw + b.w/2; - b.y = (j + x[index + 2 * stride]) / lh + b.h/2; + b.x = (i + x[index + 0 * stride]) / lw + b.w / 2; + b.y = (j + x[index + 2 * stride]) / lh + b.h / 2; } else if (yolo_point == YOLO_RIGHT_BOTTOM) { - b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; - b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; b.x = (i + x[index + 0 * stride]) / lw - b.w / 2; b.y = (j + x[index + 2 * stride]) / lh - b.h / 2; } @@ -185,7 +182,6 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind if (yolo_point == YOLO_CENTER) { tx = (truth.x*lw - i); ty = (truth.y*lh - j); - } else if (yolo_point == YOLO_LEFT_TOP) { tx = ((truth.x - truth.w / 2)*lw - i); @@ -251,28 +247,33 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind // https://github.com/generalized-iou/g-darknet // https://arxiv.org/abs/1902.09630v2 // https://giou.stanford.edu/ + // https://arxiv.org/abs/1911.08287v1 + // https://github.com/Zzh-tju/DIoU-darknet all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); - float dx, dy; + float dx, dy, dw, dh; + + dx = all_ious.dx_iou.dt; + dy = all_ious.dx_iou.db; + dw = all_ious.dx_iou.dl; + dh = all_ious.dx_iou.dr; if (yolo_point == YOLO_CENTER) { - dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); } else if (yolo_point == YOLO_LEFT_TOP) { - dx = all_ious.dx_iou.dl; - dy = all_ious.dx_iou.dt; + dx = dx - dw/2; + dy = dy - dh/2; } else if (yolo_point == YOLO_RIGHT_BOTTOM) { - dx = all_ious.dx_iou.dr; - dy = all_ious.dx_iou.db; + dx = dx + dw / 2; + dy = dy + dh / 2; } // jacobian^t (transpose) //float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); //float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); - float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); - float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + //float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); + //float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); // predict exponential, apply gradient of e^delta_t ONLY for w,h dw *= exp(x[index + 4 * stride]); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 81d2de709ae..40e73878a75 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -162,10 +162,16 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); // jacobian^t (transpose) - float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); - float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); - float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + //float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); + //float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); + //float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); + //float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); + + // jacobian^t (transpose) + float dx = all_ious.dx_iou.dt; + float dy = all_ious.dx_iou.db; + float dw = all_ious.dx_iou.dl; + float dh = all_ious.dx_iou.dr; // predict exponential, apply gradient of e^delta_t ONLY for w,h dw *= exp(x[index + 2 * stride]); From 3abbd858084c9f3634a30307f36a0d23303796b6 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sat, 23 Nov 2019 20:00:35 +0300 Subject: [PATCH 86/86] fixed consistency darknet and python code --- build/darknet/x64/darknet.py | 3 ++- darknet.py | 3 ++- src/gaussian_yolo_layer.c | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/build/darknet/x64/darknet.py b/build/darknet/x64/darknet.py index 5cfb26ba16e..55afcb0ea42 100644 --- a/build/darknet/x64/darknet.py +++ b/build/darknet/x64/darknet.py @@ -60,7 +60,8 @@ class DETECTION(Structure): ("mask", POINTER(c_float)), ("objectness", c_float), ("sort_class", c_int), - ("uc", POINTER(c_float))] + ("uc", POINTER(c_float)), + ("points", c_int)] class IMAGE(Structure): diff --git a/darknet.py b/darknet.py index 5cfb26ba16e..55afcb0ea42 100644 --- a/darknet.py +++ b/darknet.py @@ -60,7 +60,8 @@ class DETECTION(Structure): ("mask", POINTER(c_float)), ("objectness", c_float), ("sort_class", c_int), - ("uc", POINTER(c_float))] + ("uc", POINTER(c_float)), + ("points", c_int)] class IMAGE(Structure): diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 0fe8a5e16f5..d179ddd83d5 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -136,10 +136,10 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; + b.x = (i + x[index + 0 * stride]) / lw; + b.y = (j + x[index + 2 * stride]) / lh; if (yolo_point == YOLO_CENTER) { - b.x = (i + x[index + 0 * stride]) / lw; - b.y = (j + x[index + 2 * stride]) / lh; } else if (yolo_point == YOLO_LEFT_TOP) { b.x = (i + x[index + 0 * stride]) / lw + b.w / 2; @@ -176,12 +176,12 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind float tx, ty, tw, th; + tx = (truth.x*lw - i); + ty = (truth.y*lh - j); tw = log(truth.w*w / biases[2 * n]); th = log(truth.h*h / biases[2 * n + 1]); if (yolo_point == YOLO_CENTER) { - tx = (truth.x*lw - i); - ty = (truth.y*lh - j); } else if (yolo_point == YOLO_LEFT_TOP) { tx = ((truth.x - truth.w / 2)*lw - i);