minor performance improvements and upgraded hdf5tools

tethys-ts · Nov 5, 2022 · d9ba7e3 · d9ba7e3
1 parent 261e073
commit d9ba7e3
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 15 deletions.
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -1,5 +1,5 @@
 {% set name = "tethysts" %}
-{% set version = "4.5.5" %}
+{% set version = "4.5.6" %}
 # {% set sha256 = "ae2cc83fb5a75e8dc3e1b2c2137deea412c8a4c7c9acca52bf4ec59de52a80c9" %}
 
 # sha256 is the prefered checksum -- you can get it for a file with:
@@ -44,7 +44,7 @@ requirements:
     - requests
     - shapely
     - tethys-data-models >=0.4.11
-    - hdf5tools >=0.1.3
+    - hdf5tools >=0.1.4
     - s3tethys >=0.0.4
 
 test:

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 name = 'tethysts'
 main_package = 'tethysts'
 datasets = 'datasets/time_series'
-version = '4.5.5'
+version = '4.5.6'
 descrip = 'tethys time series S3 extraction'
 
 # The below code is for readthedocs. To have sphinx/readthedocs interact with
@@ -19,7 +19,7 @@
 if os.environ.get('READTHEDOCS', False) == 'True':
     INSTALL_REQUIRES = []
 else:
-    INSTALL_REQUIRES = ['zstandard', 'pandas', 'xarray', 'scipy', 'orjson', 'requests', 'shapely', 'tethys-data-models>=0.4.11', 'hdf5tools>=0.1.3', 's3tethys>=0.0.4']
+    INSTALL_REQUIRES = ['zstandard', 'pandas', 'xarray', 'scipy', 'orjson', 'requests', 'shapely', 'tethys-data-models>=0.4.11', 'hdf5tools>=0.1.4', 's3tethys>=0.0.4']
 
 # Get the long description from the README file
 with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:

diff --git a/tethysts/tests/utest_tethysts.py b/tethysts/tests/utest_tethysts.py
@@ -233,8 +233,8 @@
 remote = {'bucket': 'tasman-env', 'public_url': 'https://b2.tethys-ts.xyz/file', 'version': 4}
 remote = {'bucket': 'noaa-nwm', 'public_url': 'https://b2.tethys-ts.xyz/file', 'version': 4}
 
-# cache = '/media/nvme1/cache/tethys'
-cache = '/home/mike/cache/tethys'
+cache = '/media/nvme1/cache/tethys'
+# cache = '/home/mike/cache/tethys'
 
 dataset_id = '7751c5f1bf47867fb109d7eb'
 dataset_id = '0b2bd62cc42f3096136f11e9'
@@ -293,12 +293,12 @@
 station_ids = [s['station_id'] for s in stns1 if ref in s['ref']]
 
 results1 = self.get_results(dataset_id, station_ids, heights=None)
-results1 = self.get_results(dataset_id, station_ids, heights=None, output_path='/home/mike/cache/tethys/test.h5', compression='zstd')
+results1 = self.get_results(dataset_id, station_ids, heights=None, output_path='/media/nvme1/cache/tethys/test.h5', compression='zstd')
 
 results1 = self.get_results(dataset_id, station_ids, heights=[10, 20, 30, 80], version_date=version_date)
 
 results1 = self.get_results(dataset_id, station_ids, heights=[10])
-results1 = self.get_results(dataset_id, station_ids, heights=[10], output_path='/home/mike/cache/tethys/test.h5', compression='zstd')
+results1 = self.get_results(dataset_id, station_ids, heights=[10], output_path='/media/nvme1/cache/tethys/test.h5', compression='zstd')
 
 results1 = self.get_results(dataset_id, station_ids, heights=[10], from_date='2020-04-01')
 
@@ -450,9 +450,9 @@
         # If stations are found, then get the associated results
         if stn_ids:
             results = ts.get_results(dataset_id, stn_ids, heights=heights)
-    
+
             output_file = '{ds}_{min_x}_{min_y}_{max_x}_{max_y}.nc'.format(ds=dataset_id, min_x=min_x, min_y=min_y, max_x=x, max_y=y)
-    
+
             # Save the file to netcdf (xarray uses an excessive amount of memory when saving to netcdf, so be careful...)
             results.to_netcdf(os.path.join(output_path, output_file))
             results.close()

diff --git a/tethysts/utils.py b/tethysts/utils.py
@@ -746,7 +746,7 @@ def download_results(chunk: dict, bucket: str, s3: botocore.client.BaseClient =
             chunk_path.parent.mkdir(parents=True, exist_ok=True)
 
             if chunk['key'].endswith('.zst'):
-                data = xr.load_dataset(s3tethys.decompress_stream_to_object(file_obj, 'zstd'))
+                data = xr.load_dataset(s3tethys.decompress_stream_to_object(io.BytesIO(file_obj.read()), 'zstd'))
                 H5(data).sel(exclude_coords=['station_geometry', 'chunk_date']).to_hdf5(chunk_path)
                 data.close()
                 del data
@@ -760,16 +760,18 @@ def download_results(chunk: dict, bucket: str, s3: botocore.client.BaseClient =
             return file_obj
 
         if chunk['key'].endswith('.zst'):
-            file_obj = s3tethys.decompress_stream_to_object(file_obj, 'zstd')
+            file_obj = s3tethys.decompress_stream_to_object(io.BytesIO(file_obj.read()), 'zstd')
             data = xr.load_dataset(file_obj.read(), engine='scipy')
         else:
-            data = xr.load_dataset(io.BytesIO(file_obj.read()), engine='h5netcdf')
+            data = io.BytesIO(file_obj.read())
 
-        h1 = H5(data).sel(exclude_coords=['station_geometry', 'chunk_date'])
+        h1 = H5(data)
         data_obj = io.BytesIO()
+        h1 = result_filters(h1)
         h1.to_hdf5(data_obj)
 
-        data.close()
+        if isinstance(data, xr.Dataset):
+            data.close()
         del data
         del h1