From 76dd9b89d6a3845677f7f155b3c89cfbdbee3d6b Mon Sep 17 00:00:00 2001
From: Louis-David Perron <100434291+perronld@users.noreply.github.com>
Date: Wed, 20 Jul 2022 08:52:12 -0400
Subject: [PATCH 1/6] Removed mandatory chunking for local files

---
 finch/processes/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/finch/processes/utils.py b/finch/processes/utils.py
index 5cb59988..fc301825 100644
--- a/finch/processes/utils.py
+++ b/finch/processes/utils.py
@@ -301,6 +301,8 @@ def try_opendap(
     if is_opendap_url(url):
         ds = xr.open_dataset(url, chunks=chunks, decode_times=decode_times)
         logging_function(f"Opened dataset as an OPeNDAP url: {url}")
+        if not chunks:
+            ds = ds.chunk(chunk_dataset(ds, max_size=1000000, chunk_dims=chunk_dims))
     else:
         if url.startswith("http"):
             # Accessing the file property writes it to disk if it's a url
@@ -315,8 +317,6 @@ def try_opendap(
         chunks = dict(time=-1, region=5)
         ds = ds.chunk(chunks)
 
-    if not chunks:
-        ds = ds.chunk(chunk_dataset(ds, max_size=1000000, chunk_dims=chunk_dims))
     return ds
 
 

From 36d57ecac7eeedca3193572b5ab8358d5564f400 Mon Sep 17 00:00:00 2001
From: Louis-David Perron <100434291+perronld@users.noreply.github.com>
Date: Wed, 20 Jul 2022 15:55:55 -0400
Subject: [PATCH 2/6] Added comment in CHANGES.rst

---
 CHANGES.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index 417a9933..cd8c8576 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,10 @@
 Changes
 *******
 
+0.9.3 (unreleased)
+==================
+* Improved subset_grid_point_dataset & subset_bbox_dataset performance when using local files
+
 0.9.2 (2022-07-19)
 ==================
 * Fix Finch unable to startup in the Docker image.

From c2610c73242c85adf0287c723076d2069c014bb3 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 22 Jul 2022 12:47:17 -0400
Subject: [PATCH 3/6] Switch to auto chunking - disable on gridpoint

---
 finch/processes/subset.py |  3 ++-
 finch/processes/utils.py  | 16 +++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/finch/processes/subset.py b/finch/processes/subset.py
index a3884b85..69c134c6 100644
--- a/finch/processes/subset.py
+++ b/finch/processes/subset.py
@@ -83,7 +83,8 @@ def _subset(resource: ComplexInput):
 
         # if not subsetting by time, it's not necessary to decode times
         time_subset = start_date is not None or end_date is not None
-        dataset = try_opendap(resource, decode_times=time_subset)
+        # No chunking needed for a single gridpoint.
+        dataset = try_opendap(resource, chunks=False, decode_times=time_subset)
 
         with lock:
             count += 1
diff --git a/finch/processes/utils.py b/finch/processes/utils.py
index fc301825..5a171587 100644
--- a/finch/processes/utils.py
+++ b/finch/processes/utils.py
@@ -286,23 +286,24 @@ def drs_filename(ds: xr.Dataset, variable: str = None):
 def try_opendap(
     input: ComplexInput,
     *,
-    chunks=None,
+    chunks='auto',
     decode_times=True,
     chunk_dims=None,
     logging_function=lambda message: None,
 ) -> xr.Dataset:
     """Try to open the file as an OPeNDAP url and chunk it.
 
-    If OPeNDAP fails, access the file directly.
+    By default, chunks are to be determined by xarray/dask.
+    If `chunks=None` or `chunks_dims` is given, finch rechunks the dataset according to 
+    the logic of `chunk_dataset`.
+    Pass `chunks=False` to disable dask entirely on this dataset.
     """
     url = input.url
     logging_function(f"Try opening DAP link {url}")
 
     if is_opendap_url(url):
-        ds = xr.open_dataset(url, chunks=chunks, decode_times=decode_times)
+        ds = xr.open_dataset(url, chunks=chunks or None, decode_times=decode_times)
         logging_function(f"Opened dataset as an OPeNDAP url: {url}")
-        if not chunks:
-            ds = ds.chunk(chunk_dataset(ds, max_size=1000000, chunk_dims=chunk_dims))
     else:
         if url.startswith("http"):
             # Accessing the file property writes it to disk if it's a url
@@ -310,13 +311,14 @@ def try_opendap(
         else:
             logging_function(f"Opening as local file: {input.file}")
 
-        ds = xr.open_dataset(input.file, chunks=chunks, decode_times=decode_times)
+        ds = xr.open_dataset(input.file, chunks=chunks or None, decode_times=decode_times)
 
     # To handle large number of grid cells (50+) in subsetted data
     if "region" in ds.dims and "time" in ds.dims:
         chunks = dict(time=-1, region=5)
         ds = ds.chunk(chunks)
-
+    elif chunks is None or chunk_dims is not None:
+        ds = ds.chunk(chunk_dataset(ds, max_size=1000000, chunk_dims=chunk_dims))
     return ds
 
 

From 1dd4100e6dff31864f2b7f982620e87dccdb94d5 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 22 Jul 2022 12:49:24 -0400
Subject: [PATCH 4/6] lint

---
 finch/processes/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finch/processes/utils.py b/finch/processes/utils.py
index 5a171587..576888d7 100644
--- a/finch/processes/utils.py
+++ b/finch/processes/utils.py
@@ -294,7 +294,7 @@ def try_opendap(
     """Try to open the file as an OPeNDAP url and chunk it.
 
     By default, chunks are to be determined by xarray/dask.
-    If `chunks=None` or `chunks_dims` is given, finch rechunks the dataset according to 
+    If `chunks=None` or `chunks_dims` is given, finch rechunks the dataset according to
     the logic of `chunk_dataset`.
     Pass `chunks=False` to disable dask entirely on this dataset.
     """

From 9afd3a6d9446f289e2f9f919ff9cb590c6297906 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 13 Oct 2022 14:36:02 -0400
Subject: [PATCH 5/6] Avoid failing with object vars - rechunk for sdba

---
 finch/processes/utils.py    | 15 +++++++++++++--
 finch/processes/wps_sdba.py |  4 ++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/finch/processes/utils.py b/finch/processes/utils.py
index 576888d7..b51185ac 100644
--- a/finch/processes/utils.py
+++ b/finch/processes/utils.py
@@ -302,7 +302,7 @@ def try_opendap(
     logging_function(f"Try opening DAP link {url}")
 
     if is_opendap_url(url):
-        ds = xr.open_dataset(url, chunks=chunks or None, decode_times=decode_times)
+        path = url
         logging_function(f"Opened dataset as an OPeNDAP url: {url}")
     else:
         if url.startswith("http"):
@@ -310,8 +310,19 @@ def try_opendap(
             logging_function(f"Downloading dataset for url: {url}")
         else:
             logging_function(f"Opening as local file: {input.file}")
+        path = input.file
 
-        ds = xr.open_dataset(input.file, chunks=chunks or None, decode_times=decode_times)
+    try:
+        # Try to open the dataset
+        ds = xr.open_dataset(path, chunks=chunks or None, decode_times=decode_times)
+    except NotImplementedError:
+        if chunks == 'auto':
+            # Some dtypes are not compatible with auto chunking (object, so unbounded strings)
+            logging_function(f"xarray auto-chunking failed, opening with no chunks and inferring chunks ourselves.")
+            chunks = None
+            ds = xr.open_dataset(path, chunks=None, decode_times=decode_times)
+        else:
+            raise
 
     # To handle large number of grid cells (50+) in subsetted data
     if "region" in ds.dims and "time" in ds.dims:
diff --git a/finch/processes/wps_sdba.py b/finch/processes/wps_sdba.py
index f8b5c215..9ebf47b8 100644
--- a/finch/processes/wps_sdba.py
+++ b/finch/processes/wps_sdba.py
@@ -163,8 +163,8 @@ def _log(message, percentage):
                 ds = try_opendap(request.inputs[key][0])
                 name = variable or list(ds.data_vars)[0]
 
-                # Force calendar to noleap
-                res[key] = convert_calendar(ds[name], "noleap")
+                # Force calendar to noleap and rechunk
+                res[key] = convert_calendar(ds[name], "noleap").chunk({'time': -1})
 
             elif key in group_args:
                 group[key] = single_input_or_none(request.inputs, key)

From 203fa93cba2266d6f942bee88fe57415b197a561 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 13 Oct 2022 14:37:25 -0400
Subject: [PATCH 6/6] remove unused f

---
 finch/processes/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finch/processes/utils.py b/finch/processes/utils.py
index b51185ac..9fdaff9c 100644
--- a/finch/processes/utils.py
+++ b/finch/processes/utils.py
@@ -318,7 +318,7 @@ def try_opendap(
     except NotImplementedError:
         if chunks == 'auto':
             # Some dtypes are not compatible with auto chunking (object, so unbounded strings)
-            logging_function(f"xarray auto-chunking failed, opening with no chunks and inferring chunks ourselves.")
+            logging_function("xarray auto-chunking failed, opening with no chunks and inferring chunks ourselves.")
             chunks = None
             ds = xr.open_dataset(path, chunks=None, decode_times=decode_times)
         else: