Skip to content

Commit

Permalink
Improve performance of color distance calculations by kernel fusion (#…
Browse files Browse the repository at this point in the history
…809)

This MR is purely to fuse many separate kernels into a single elementwise kernel for each of the "color distance" functions. This is expected to result in substantial performance improvement by reduced kernel launch overhead and because a single pass through the image is much more memory efficient than many separate kernel calls.

There is not expected to be any change in behavior (existing tests must continue to pass)

## Benchmark Results

I added benchmarks for these functions and compared the results before and after this change.

The acceleration values are the relative speedup as compared to the scikit-image implementation

### For a pair of 32-bit LAB images of shape: (512, 512, 3)

function         | acceleration (old) | acceleration (new)
-----------------|--------------------|-------------------
deltaE_cie76     |   5.49             |   21.85
deltaE_ciede94   |   37.60            |   65.09
deltaE_ciede2000 |   12.54            |  109.71
deltaE_cmc       |   27.18            |   89.23

### For a pair of 32-bit LAB images of shape: (3840, 2160, 3)

function         | acceleration (old) | acceleration (new)
-----------------|--------------------|-------------------
deltaE_cie76     |   31.12            |  250.74
deltaE_ciede94   |   79.15            |  117.50
deltaE_ciede2000 |   71.36            |  132.73
deltaE_cmc       |   70.65            |  154.83

Authors:
  - Gregory Lee (https://github.com/grlee77)

Approvers:
  - Gigon Bae (https://github.com/gigony)

URL: #809
  • Loading branch information
grlee77 authored Jan 10, 2025
1 parent cc1b25f commit 8fe097e
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 119 deletions.
48 changes: 48 additions & 0 deletions benchmarks/skimage/cucim_color_bench.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import math
import os
import pickle

Expand All @@ -25,6 +26,10 @@
"lab2xyz",
"rgba2rgb",
"label2rgb",
"deltaE_cie76",
"deltaE_ciede94",
"deltaE_ciede2000",
"deltaE_cmc",
]


Expand All @@ -40,6 +45,32 @@ def set_args(self, dtype):
self.args_gpu = (imaged,)


class DeltaEBench(ImageBench):
def set_args(self, dtype):
from skimage import color, data

# create synthetic lab image pair
rgb1 = data.astronaut()
lab1 = color.rgb2lab(rgb1)
lab2 = color.rgb2lab(np.roll(rgb1, (1, 1), axis=(0, 1)))

# change to desired dtype
lab1 = lab1.astype(dtype, copy=False)
lab2 = lab2.astype(dtype, copy=False)

# tile then crop as needed to get the expected size
n_tile0 = math.ceil(self.shape[0] / lab1.shape[0])
n_tile1 = math.ceil(self.shape[1] / lab1.shape[1])
lab1 = np.tile(lab1, (n_tile0, n_tile1, 1))
lab1 = lab1[: self.shape[0], : self.shape[1], :]
lab2 = np.tile(lab2, (n_tile0, n_tile1, 1))
lab2 = lab2[: self.shape[0], : self.shape[1], :]

print(f"{lab1.shape=}")
self.args_cpu = (lab1, lab2)
self.args_gpu = (cp.asarray(lab1), cp.asarray(lab2))


class RGBABench(ImageBench):
def set_args(self, dtype):
if self.shape[-1] != 4:
Expand Down Expand Up @@ -162,6 +193,23 @@ def main(args):
results = B.run_benchmark(duration=args.duration)
all_results = pd.concat([all_results, results["full"]])

elif function_name.startswith("deltaE"):
# only run these functions for floating point data types
float_dtypes = [t for t in dtypes if np.dtype(t).kind == "f"]

B = DeltaEBench(
function_name=function_name,
shape=shape + (3,),
dtypes=float_dtypes,
fixed_kwargs={},
var_kwargs={},
# index_str=f"{fromspace.lower()}2{tospace.lower()}",
module_cpu=skimage.color,
module_gpu=cucim.skimage.color,
run_cpu=run_cpu,
)
results = B.run_benchmark(duration=args.duration)
all_results = pd.concat([all_results, results["full"]])
elif function_name == "rgba2rgb":
B = RGBABench(
function_name="rgba2rgb",
Expand Down
11 changes: 11 additions & 0 deletions benchmarks/skimage/run-nv-bench-color.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,14 @@ for shape in "${param_shape[@]}"; do
done
done
done

param_shape=(512,512 3840,2160)
param_filt=(deltaE_cie76 deltaE_ciede94 deltaE_ciede2000 deltaE_cmc)
param_dt=(float32, float64)
for shape in "${param_shape[@]}"; do
for filt in "${param_filt[@]}"; do
for dt in "${param_dt[@]}"; do
python cucim_color_bench.py -f $filt -i $shape -d $dt -t 10
done
done
done
Loading

0 comments on commit 8fe097e

Please sign in to comment.