Skip to content

Commit

Permalink
minor performance improvements and upgraded hdf5tools
Browse files Browse the repository at this point in the history
  • Loading branch information
mullenkamp committed Nov 5, 2022
1 parent 261e073 commit d9ba7e3
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 15 deletions.
4 changes: 2 additions & 2 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "tethysts" %}
{% set version = "4.5.5" %}
{% set version = "4.5.6" %}
# {% set sha256 = "ae2cc83fb5a75e8dc3e1b2c2137deea412c8a4c7c9acca52bf4ec59de52a80c9" %}

# sha256 is the prefered checksum -- you can get it for a file with:
Expand Down Expand Up @@ -44,7 +44,7 @@ requirements:
- requests
- shapely
- tethys-data-models >=0.4.11
- hdf5tools >=0.1.3
- hdf5tools >=0.1.4
- s3tethys >=0.0.4

test:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
name = 'tethysts'
main_package = 'tethysts'
datasets = 'datasets/time_series'
version = '4.5.5'
version = '4.5.6'
descrip = 'tethys time series S3 extraction'

# The below code is for readthedocs. To have sphinx/readthedocs interact with
Expand All @@ -19,7 +19,7 @@
if os.environ.get('READTHEDOCS', False) == 'True':
INSTALL_REQUIRES = []
else:
INSTALL_REQUIRES = ['zstandard', 'pandas', 'xarray', 'scipy', 'orjson', 'requests', 'shapely', 'tethys-data-models>=0.4.11', 'hdf5tools>=0.1.3', 's3tethys>=0.0.4']
INSTALL_REQUIRES = ['zstandard', 'pandas', 'xarray', 'scipy', 'orjson', 'requests', 'shapely', 'tethys-data-models>=0.4.11', 'hdf5tools>=0.1.4', 's3tethys>=0.0.4']

# Get the long description from the README file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
Expand Down
12 changes: 6 additions & 6 deletions tethysts/tests/utest_tethysts.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@
remote = {'bucket': 'tasman-env', 'public_url': 'https://b2.tethys-ts.xyz/file', 'version': 4}
remote = {'bucket': 'noaa-nwm', 'public_url': 'https://b2.tethys-ts.xyz/file', 'version': 4}

# cache = '/media/nvme1/cache/tethys'
cache = '/home/mike/cache/tethys'
cache = '/media/nvme1/cache/tethys'
# cache = '/home/mike/cache/tethys'

dataset_id = '7751c5f1bf47867fb109d7eb'
dataset_id = '0b2bd62cc42f3096136f11e9'
Expand Down Expand Up @@ -293,12 +293,12 @@
station_ids = [s['station_id'] for s in stns1 if ref in s['ref']]

results1 = self.get_results(dataset_id, station_ids, heights=None)
results1 = self.get_results(dataset_id, station_ids, heights=None, output_path='/home/mike/cache/tethys/test.h5', compression='zstd')
results1 = self.get_results(dataset_id, station_ids, heights=None, output_path='/media/nvme1/cache/tethys/test.h5', compression='zstd')

results1 = self.get_results(dataset_id, station_ids, heights=[10, 20, 30, 80], version_date=version_date)

results1 = self.get_results(dataset_id, station_ids, heights=[10])
results1 = self.get_results(dataset_id, station_ids, heights=[10], output_path='/home/mike/cache/tethys/test.h5', compression='zstd')
results1 = self.get_results(dataset_id, station_ids, heights=[10], output_path='/media/nvme1/cache/tethys/test.h5', compression='zstd')

results1 = self.get_results(dataset_id, station_ids, heights=[10], from_date='2020-04-01')

Expand Down Expand Up @@ -450,9 +450,9 @@
# If stations are found, then get the associated results
if stn_ids:
results = ts.get_results(dataset_id, stn_ids, heights=heights)

output_file = '{ds}_{min_x}_{min_y}_{max_x}_{max_y}.nc'.format(ds=dataset_id, min_x=min_x, min_y=min_y, max_x=x, max_y=y)

# Save the file to netcdf (xarray uses an excessive amount of memory when saving to netcdf, so be careful...)
results.to_netcdf(os.path.join(output_path, output_file))
results.close()
Expand Down
12 changes: 7 additions & 5 deletions tethysts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def download_results(chunk: dict, bucket: str, s3: botocore.client.BaseClient =
chunk_path.parent.mkdir(parents=True, exist_ok=True)

if chunk['key'].endswith('.zst'):
data = xr.load_dataset(s3tethys.decompress_stream_to_object(file_obj, 'zstd'))
data = xr.load_dataset(s3tethys.decompress_stream_to_object(io.BytesIO(file_obj.read()), 'zstd'))
H5(data).sel(exclude_coords=['station_geometry', 'chunk_date']).to_hdf5(chunk_path)
data.close()
del data
Expand All @@ -760,16 +760,18 @@ def download_results(chunk: dict, bucket: str, s3: botocore.client.BaseClient =
return file_obj

if chunk['key'].endswith('.zst'):
file_obj = s3tethys.decompress_stream_to_object(file_obj, 'zstd')
file_obj = s3tethys.decompress_stream_to_object(io.BytesIO(file_obj.read()), 'zstd')
data = xr.load_dataset(file_obj.read(), engine='scipy')
else:
data = xr.load_dataset(io.BytesIO(file_obj.read()), engine='h5netcdf')
data = io.BytesIO(file_obj.read())

h1 = H5(data).sel(exclude_coords=['station_geometry', 'chunk_date'])
h1 = H5(data)
data_obj = io.BytesIO()
h1 = result_filters(h1)
h1.to_hdf5(data_obj)

data.close()
if isinstance(data, xr.Dataset):
data.close()
del data
del h1

Expand Down

0 comments on commit d9ba7e3

Please sign in to comment.