From 805178d0e38471f9f064ef16aa2c6a1c032f59ee Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 25 Jul 2024 11:33:28 +0100 Subject: [PATCH 1/3] Refactor how we deal with pixel cache in compute_fixed_resolution_buffer to speed up identical pixel transformations across layers (e.g. when subsets are present) --- glue/core/fixed_resolution_buffer.py | 53 +++++++++++++++++++--------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/glue/core/fixed_resolution_buffer.py b/glue/core/fixed_resolution_buffer.py index 126d1253e..5de3c858c 100644 --- a/glue/core/fixed_resolution_buffer.py +++ b/glue/core/fixed_resolution_buffer.py @@ -123,6 +123,8 @@ def compute_fixed_resolution_buffer(data, bounds, target_data=None, target_cid=N dimension, otherwise an error will be raised. """ + print('compute_fixed_resolution_buffer', data.uuid, bounds, target_data.uuid if target_data is not None else None, target_cid.label if target_cid is not None else None, subset_state, broadcast, cache_id) + if target_data is None: target_data = data @@ -143,14 +145,14 @@ def compute_fixed_resolution_buffer(data, bounds, target_data=None, target_cid=N if cache_id is not None: if subset_state is None: - # Use uuid for component ID since otherwise component IDs don't return + # Use uuid for data and component ID since otherwise component IDs don't return # False when comparing two different CIDs (instead they return a subset state). # For bounds we use a special wrapper that can identify wildcards. - current_array_hash = (data, bounds, target_data, target_cid.uuid, broadcast) + current_array_hash = (data.uuid, bounds, target_data.uuid, target_cid.uuid, broadcast) else: - current_array_hash = (data, bounds, target_data, subset_state, broadcast) + current_array_hash = (data.uuid, bounds, target_data.uuid, subset_state, broadcast) - current_pixel_hash = (data, target_data) + current_pixel_hash = (data.uuid, target_data.uuid) if cache_id in ARRAY_CACHE: if ARRAY_CACHE[cache_id]['hash'] == current_array_hash: @@ -158,9 +160,32 @@ def compute_fixed_resolution_buffer(data, bounds, target_data=None, target_cid=N # To save time later, if the pixel cache doesn't match at the level of the # data and target_data, we just reset the cache. - if cache_id in PIXEL_CACHE: - if PIXEL_CACHE[cache_id]['hash'] != current_pixel_hash: - PIXEL_CACHE.pop(cache_id) + # if cache_id in PIXEL_CACHE: + # if PIXEL_CACHE[cache_id]['hash'] != current_pixel_hash: + # PIXEL_CACHE.pop(cache_id) + + # For pixel transformations, we don't necessarily need the cache_id to match, as coordinate + # transformations can be the same for different callers - for instance, layer artists for + # subsets might need the same transformations over all subsets and the same as for the actual + # data. Therefore, instead of extracting the cache_id entry from PIXEL_CACHE, we instead loop + # over PIXEL_CACHE, and for each pixel component ID, we find the first entry that matches the + # hash and the bounds, since that should uniquely identify the transformation. Note that we + # still use cache_id because we still want to make sure we save results from different layers + # to the cache in case any layers get removed or changed. + matching_pixel_cache = {} + for chid in PIXEL_CACHE: + if PIXEL_CACHE[chid]['hash'] == current_pixel_hash: + for ipix, pix in enumerate(data.pixel_component_ids): + if ( + ipix not in matching_pixel_cache and + ipix in PIXEL_CACHE[chid] and + PIXEL_CACHE[chid][ipix]['bounds'] == bounds + ): + matching_pixel_cache[ipix] = PIXEL_CACHE[chid][ipix] + + else: + + matching_pixel_cache = None # Start off by generating arrays of coordinates in the original dataset pixel_coords = [np.linspace(*bound) if isinstance(bound, tuple) else bound for bound in bounds] @@ -179,17 +204,11 @@ def compute_fixed_resolution_buffer(data, bounds, target_data=None, target_cid=N for ipix, pix in enumerate(data.pixel_component_ids): - # At this point, if cache_id is in PIXEL_CACHE, we know that data and - # target_data match so we just check the bounds. Note that the bounds - # include the AnyScalar wildcard for any dimensions that don't impact - # the pixel coordinates here. We do this so that we don't have to - # recompute the pixel coordinates when e.g. slicing through cubes. - - if cache_id in PIXEL_CACHE and ipix in PIXEL_CACHE[cache_id] and PIXEL_CACHE[cache_id][ipix]['bounds'] == bounds: + if matching_pixel_cache is not None and ipix in matching_pixel_cache: - translated_coord = PIXEL_CACHE[cache_id][ipix]['translated_coord'] - dimensions = PIXEL_CACHE[cache_id][ipix]['dimensions'] - invalid = PIXEL_CACHE[cache_id][ipix]['invalid'] + translated_coord = matching_pixel_cache[ipix]['translated_coord'] + dimensions = matching_pixel_cache[ipix]['dimensions'] + invalid = matching_pixel_cache[ipix]['invalid'] else: From 285f015f181cc7b853776cb6014a152af5aa7709 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 25 Jul 2024 11:49:13 +0100 Subject: [PATCH 2/3] Remove unused code --- glue/core/fixed_resolution_buffer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/glue/core/fixed_resolution_buffer.py b/glue/core/fixed_resolution_buffer.py index 5de3c858c..1eea14e36 100644 --- a/glue/core/fixed_resolution_buffer.py +++ b/glue/core/fixed_resolution_buffer.py @@ -158,12 +158,6 @@ def compute_fixed_resolution_buffer(data, bounds, target_data=None, target_cid=N if ARRAY_CACHE[cache_id]['hash'] == current_array_hash: return ARRAY_CACHE[cache_id]['array'] - # To save time later, if the pixel cache doesn't match at the level of the - # data and target_data, we just reset the cache. - # if cache_id in PIXEL_CACHE: - # if PIXEL_CACHE[cache_id]['hash'] != current_pixel_hash: - # PIXEL_CACHE.pop(cache_id) - # For pixel transformations, we don't necessarily need the cache_id to match, as coordinate # transformations can be the same for different callers - for instance, layer artists for # subsets might need the same transformations over all subsets and the same as for the actual From da6b863f4194114c5f85259300ebdbefcf5015fc Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 25 Jul 2024 15:31:21 +0100 Subject: [PATCH 3/3] Remove debug statement Co-authored-by: P. L. Lim <2090236+pllim@users.noreply.github.com> --- glue/core/fixed_resolution_buffer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/glue/core/fixed_resolution_buffer.py b/glue/core/fixed_resolution_buffer.py index 1eea14e36..dc7452ab7 100644 --- a/glue/core/fixed_resolution_buffer.py +++ b/glue/core/fixed_resolution_buffer.py @@ -123,8 +123,6 @@ def compute_fixed_resolution_buffer(data, bounds, target_data=None, target_cid=N dimension, otherwise an error will be raised. """ - print('compute_fixed_resolution_buffer', data.uuid, bounds, target_data.uuid if target_data is not None else None, target_cid.label if target_cid is not None else None, subset_state, broadcast, cache_id) - if target_data is None: target_data = data