From cd2f3c5f9fe4bca166d7801930c5438d0d5de3e6 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Tue, 4 Jun 2024 14:41:02 -0700 Subject: [PATCH 01/12] Add h5netcdf pkg dependency Hoping to improve the reliability of the make_ww3_current_file and make_ww3_wind_file workers by changing them to use h5netcdf for dataset reads. The change results in version updates for multiple dependencies in requirements .txt, and inclusion of the h5netcdf package across multiple environment files and the pyproject.toml file. --- envs/environment-dev.yaml | 1 + envs/environment-fig-dev.yaml | 1 + envs/environment-linkcheck.yaml | 1 + envs/environment-prod.yaml | 1 + envs/requirements.txt | 27 +++++++++++++++------------ pyproject.toml | 1 + 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/envs/environment-dev.yaml b/envs/environment-dev.yaml index 691f2f27..02174175 100644 --- a/envs/environment-dev.yaml +++ b/envs/environment-dev.yaml @@ -41,6 +41,7 @@ dependencies: - geopandas - gitpython - gsw + - h5netcdf - httpx - lxml - mako diff --git a/envs/environment-fig-dev.yaml b/envs/environment-fig-dev.yaml index 337e0f1d..70dca8c3 100644 --- a/envs/environment-fig-dev.yaml +++ b/envs/environment-fig-dev.yaml @@ -34,6 +34,7 @@ dependencies: - geopandas - gitpython - gsw + - h5netcdf - httpx - lxml - mako diff --git a/envs/environment-linkcheck.yaml b/envs/environment-linkcheck.yaml index 2fe794c8..c9a5df8f 100644 --- a/envs/environment-linkcheck.yaml +++ b/envs/environment-linkcheck.yaml @@ -30,6 +30,7 @@ dependencies: - gitpython - gsw - httpx + - h5netcdf - lxml - mako - matplotlib diff --git a/envs/environment-prod.yaml b/envs/environment-prod.yaml index 0f8df8a6..eb878f60 100644 --- a/envs/environment-prod.yaml +++ b/envs/environment-prod.yaml @@ -41,6 +41,7 @@ dependencies: - geopandas - gitpython - gsw + - h5netcdf - httpx - lxml - mako diff --git a/envs/requirements.txt b/envs/requirements.txt index 99cfa798..ce42c66a 100644 --- a/envs/requirements.txt +++ b/envs/requirements.txt @@ -23,10 +23,11 @@ bokeh==3.4.1 Bottleneck==1.3.8 branca==0.7.2 Brotli==1.1.0 +cached-property==1.5.2 Cartopy==0.23.0 certifi==2024.2.2 cffi==1.16.0 -cfgrib==0.9.11.0 +cfgrib==0.9.12.0 cfgv==3.3.1 cftime==1.6.3 charset-normalizer==3.3.2 @@ -45,10 +46,10 @@ coverage==7.5.1 cryptography==42.0.7 cycler==0.12.1 cytoolz==0.12.3 -dask==2024.5.0 -dask-expr==1.1.0 +dask==2024.5.1 +dask-expr==1.1.1 distlib==0.3.8 -distributed==2024.5.0 +distributed==2024.5.1 docutils==0.20.1 eccodes==1.7.0 editables==0.5 @@ -60,7 +61,7 @@ feedgen==1.0.0 filelock==3.14.0 findlibs==0.0.5 fiona==1.9.6 -flox==0.9.7 +flox==0.9.8 folium==0.16.0 fonttools==4.51.0 fsspec==2024.5.0 @@ -72,7 +73,9 @@ GitPython==3.1.43 gsw==3.6.18 h11==0.14.0 h2==4.1.0 -hatch==1.11.0 +h5netcdf==1.3.0 +h5py==3.11.0 +hatch==1.12.0 hatchling==1.24.2 hpack==4.0.0 httpcore==1.0.5 @@ -148,7 +151,7 @@ PyQt5==5.15.9 PyQt5-sip==12.12.2 pyshp==2.3.1 PySocks==1.7.1 -pytest==8.2.0 +pytest==8.2.1 pytest-cov==5.0.0 pytest_httpx==0.29.0 pytest-randomly==3.15.0 @@ -159,7 +162,7 @@ pytz==2024.1 PyYAML==6.0.1 pyzmq==26.0.3 rasterio==1.3.10 -requests==2.32.0 +requests==2.32.3 requests-file==2.0.0 requests-toolbelt==1.0.0 retrying==1.3.3 @@ -167,10 +170,10 @@ rich==13.7.1 Rtree==1.2.0 schedule==1.2.1 scikit-learn==1.4.2 -scipy==1.13.0 +scipy==1.13.1 scour==0.38.2 SecretStorage==3.3.3 -sentry-sdk==2.2.0 +sentry-sdk==2.3.1 setuptools==69.5.1 shapely==2.0.4 shellingham==1.5.4 @@ -193,7 +196,7 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.7 sphinxcontrib-serializinghtml==1.1.10 stevedore==5.2.0 -structlog==24.1.0 +structlog==24.2.0 supervisor==4.2.5 sysrsync==1.1.1 tables==3.9.2 @@ -217,7 +220,7 @@ userpath==1.7.0 utm==0.7.0 verboselogs==1.7 virtualenv==20.26.2 -watchdog==4.0.0 +watchdog==4.0.1 wcwidth==0.2.13 wheel==0.43.0 xarray==2024.5.0 diff --git a/pyproject.toml b/pyproject.toml index a362344b..df22af17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "geopandas", "gitpython", "gsw", + "h5netcdf", "httpx", "lxml", "mako", From 144065baa82bf5f38532a0bde157c8c4539adba0 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Tue, 4 Jun 2024 15:00:03 -0700 Subject: [PATCH 02/12] Update make_ww3_*_file workers main() function docstrings Removed not informative "Set up and run the worker." line at the beginning. re: issue #121 --- nowcast/workers/make_ww3_current_file.py | 4 +--- nowcast/workers/make_ww3_wind_file.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/nowcast/workers/make_ww3_current_file.py b/nowcast/workers/make_ww3_current_file.py index 05b0f909..4ecab969 100644 --- a/nowcast/workers/make_ww3_current_file.py +++ b/nowcast/workers/make_ww3_current_file.py @@ -35,9 +35,7 @@ def main(): - """Set up and run the worker. - - For command-line usage see: + """For command-line usage see: :command:`python -m nowcast.workers.make_ww3_current_file --help` """ diff --git a/nowcast/workers/make_ww3_wind_file.py b/nowcast/workers/make_ww3_wind_file.py index 25345f64..00223501 100644 --- a/nowcast/workers/make_ww3_wind_file.py +++ b/nowcast/workers/make_ww3_wind_file.py @@ -28,9 +28,7 @@ def main(): - """Set up and run the worker. - - For command-line usage see: + """For command-line usage see: :command:`python -m nowcast.workers.make_ww3_wind_file --help` """ From eb5aeaf9e67cd927eeca0a01ac91084c865da109 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 09:33:03 -0700 Subject: [PATCH 03/12] Update wind file generation re: unneeded variables Refactored the WWatch3 wind file generation to remove unnecessary variables and improved related tests. The code now drops more unneeded variables to reduce the memory load. The corresponding tests are also enhanced to accurately represent these changes. --- nowcast/workers/make_ww3_wind_file.py | 15 +++++-- tests/workers/test_make_ww3_wind_file.py | 54 ++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/nowcast/workers/make_ww3_wind_file.py b/nowcast/workers/make_ww3_wind_file.py index 00223501..23cfa07c 100644 --- a/nowcast/workers/make_ww3_wind_file.py +++ b/nowcast/workers/make_ww3_wind_file.py @@ -108,10 +108,6 @@ def make_ww3_wind_file(parsed_args, config, *args): dest_dir = Path(config["wave forecasts"]["run prep dir"], "wind") filepath_tmpl = config["wave forecasts"]["wind file template"] nc_filepath = dest_dir / filepath_tmpl.format(yyyymmdd=run_date.format("YYYYMMDD")) - with xarray.open_dataset(datasets[0]) as lats_lons: - lats = lats_lons.nav_lat - lons = lats_lons.nav_lon - logger.debug(f"lats and lons from: {datasets[0]}") drop_vars = { "LHTFL_surface", "PRATE_surface", @@ -122,7 +118,18 @@ def make_ww3_wind_file(parsed_args, config, *args): "solar", "tair", "therm_rad", + "u_wind", + "v_wind", } + with xarray.open_dataset(datasets[0], drop_variables=drop_vars) as lats_lons: + lats = lats_lons.nav_lat + lons = lats_lons.nav_lon + logger.debug(f"lats and lons from: {datasets[0]}") + drop_vars = drop_vars.union({ + "nav_lon", + "nav_lat", + }) + drop_vars = drop_vars.difference({"u_wind", "v_wind"}) chunks = { "time_counter": 1, "y": 230, diff --git a/tests/workers/test_make_ww3_wind_file.py b/tests/workers/test_make_ww3_wind_file.py index f6674573..bfdafc34 100644 --- a/tests/workers/test_make_ww3_wind_file.py +++ b/tests/workers/test_make_ww3_wind_file.py @@ -223,8 +223,22 @@ def test_nowcast_dataset( run_date=arrow.get("2019-03-24"), ) make_ww3_wind_file.make_ww3_wind_file(parsed_args, config) + drop_vars = { + "LHTFL_surface", + "PRATE_surface", + "RH_2maboveground", + "atmpres", + "precip", + "qair", + "solar", + "tair", + "therm_rad", + "u_wind", + "v_wind", + } m_open_dataset.assert_called_once_with( - Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2019m03d24.nc") + Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2019m03d24.nc"), + drop_variables=drop_vars, ) chunks = { "time_counter": 1, @@ -241,6 +255,8 @@ def test_nowcast_dataset( "solar", "tair", "therm_rad", + "nav_lon", + "nav_lat", } m_open_mfdataset.assert_called_once_with( [Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2019m03d24.nc")], @@ -260,8 +276,22 @@ def test_forecast_datasets( run_date=arrow.get("2017-04-07"), ) make_ww3_wind_file.make_ww3_wind_file(parsed_args, config) + drop_vars = { + "LHTFL_surface", + "PRATE_surface", + "RH_2maboveground", + "atmpres", + "precip", + "qair", + "solar", + "tair", + "therm_rad", + "u_wind", + "v_wind", + } m_open_dataset.assert_called_once_with( - Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2017m04d07.nc") + Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2017m04d07.nc"), + drop_variables=drop_vars, ) chunks = { "time_counter": 1, @@ -278,6 +308,8 @@ def test_forecast_datasets( "solar", "tair", "therm_rad", + "nav_lon", + "nav_lat", } m_open_mfdataset.assert_called_once_with( [ @@ -301,8 +333,22 @@ def test_forecast2_datasets( run_date=arrow.get("2017-04-07"), ) make_ww3_wind_file.make_ww3_wind_file(parsed_args, config) + drop_vars = { + "LHTFL_surface", + "PRATE_surface", + "RH_2maboveground", + "atmpres", + "precip", + "qair", + "solar", + "tair", + "therm_rad", + "u_wind", + "v_wind", + } m_open_dataset.assert_called_once_with( - Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/fcst/ops_y2017m04d07.nc") + Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/fcst/ops_y2017m04d07.nc"), + drop_variables=drop_vars, ) chunks = { "time_counter": 1, @@ -319,6 +365,8 @@ def test_forecast2_datasets( "solar", "tair", "therm_rad", + "nav_lon", + "nav_lat", } m_open_mfdataset.assert_called_once_with( [ From 572c7b043488c0213eec91ae82f30f3302936b2e Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:00:00 -0700 Subject: [PATCH 04/12] Switch to h5netcdf engine for xarray dataset reads The h5netcdf engine has been set as the engine for opening datasets in 'make_ww3_wind_file.py'. The intent is to avoid the netcdf4 package thread-safety issues. The corresponding test cases in 'test_make_ww3_wind_file.py' have also been updated to reflect this change. --- nowcast/workers/make_ww3_wind_file.py | 3 ++- tests/workers/test_make_ww3_wind_file.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/nowcast/workers/make_ww3_wind_file.py b/nowcast/workers/make_ww3_wind_file.py index 23cfa07c..a0577224 100644 --- a/nowcast/workers/make_ww3_wind_file.py +++ b/nowcast/workers/make_ww3_wind_file.py @@ -121,7 +121,7 @@ def make_ww3_wind_file(parsed_args, config, *args): "u_wind", "v_wind", } - with xarray.open_dataset(datasets[0], drop_variables=drop_vars) as lats_lons: + with xarray.open_dataset(datasets[0], drop_variables=drop_vars, engine = "h5netcdf") as lats_lons: lats = lats_lons.nav_lat lons = lats_lons.nav_lon logger.debug(f"lats and lons from: {datasets[0]}") @@ -142,6 +142,7 @@ def make_ww3_wind_file(parsed_args, config, *args): coords="minimal", data_vars="minimal", drop_variables=drop_vars, + engine="h5netcdf", ) as hrdps: ds = _create_dataset( hrdps.time_counter, lats, lons, hrdps.u_wind, hrdps.v_wind, datasets diff --git a/tests/workers/test_make_ww3_wind_file.py b/tests/workers/test_make_ww3_wind_file.py index bfdafc34..fc6a6a2d 100644 --- a/tests/workers/test_make_ww3_wind_file.py +++ b/tests/workers/test_make_ww3_wind_file.py @@ -239,6 +239,7 @@ def test_nowcast_dataset( m_open_dataset.assert_called_once_with( Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2019m03d24.nc"), drop_variables=drop_vars, + engine="h5netcdf", ) chunks = { "time_counter": 1, @@ -265,6 +266,7 @@ def test_nowcast_dataset( coords="minimal", data_vars="minimal", drop_variables=drop_vars, + engine="h5netcdf", ) def test_forecast_datasets( @@ -292,6 +294,7 @@ def test_forecast_datasets( m_open_dataset.assert_called_once_with( Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/ops_y2017m04d07.nc"), drop_variables=drop_vars, + engine="h5netcdf", ) chunks = { "time_counter": 1, @@ -322,6 +325,7 @@ def test_forecast_datasets( coords="minimal", data_vars="minimal", drop_variables=drop_vars, + engine="h5netcdf" ) def test_forecast2_datasets( @@ -349,6 +353,7 @@ def test_forecast2_datasets( m_open_dataset.assert_called_once_with( Path("/nemoShare/MEOPAR/GEM2.5/ops/NEMO-atmos/fcst/ops_y2017m04d07.nc"), drop_variables=drop_vars, + engine="h5netcdf", ) chunks = { "time_counter": 1, @@ -379,4 +384,5 @@ def test_forecast2_datasets( coords="minimal", data_vars="minimal", drop_variables=drop_vars, + engine="h5netcdf" ) From 8d4c9282575d488f771e835b3783c76f6f86d40a Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:02:34 -0700 Subject: [PATCH 05/12] Increase wind time_counter chunk size from 1 to 24 The chunk size for the time_counter variable in the make_ww3_wind_file worker and corresponding tests has been increased. This change is anticipated to improve efficiency by processing data in larger batches. --- nowcast/workers/make_ww3_wind_file.py | 2 +- tests/workers/test_make_ww3_wind_file.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nowcast/workers/make_ww3_wind_file.py b/nowcast/workers/make_ww3_wind_file.py index a0577224..59cac7bf 100644 --- a/nowcast/workers/make_ww3_wind_file.py +++ b/nowcast/workers/make_ww3_wind_file.py @@ -131,7 +131,7 @@ def make_ww3_wind_file(parsed_args, config, *args): }) drop_vars = drop_vars.difference({"u_wind", "v_wind"}) chunks = { - "time_counter": 1, + "time_counter": 24, "y": 230, "x": 190, } diff --git a/tests/workers/test_make_ww3_wind_file.py b/tests/workers/test_make_ww3_wind_file.py index fc6a6a2d..e6bd0b66 100644 --- a/tests/workers/test_make_ww3_wind_file.py +++ b/tests/workers/test_make_ww3_wind_file.py @@ -242,7 +242,7 @@ def test_nowcast_dataset( engine="h5netcdf", ) chunks = { - "time_counter": 1, + "time_counter": 24, "y": 230, "x": 190, } @@ -297,7 +297,7 @@ def test_forecast_datasets( engine="h5netcdf", ) chunks = { - "time_counter": 1, + "time_counter": 24, "y": 230, "x": 190, } @@ -356,7 +356,7 @@ def test_forecast2_datasets( engine="h5netcdf", ) chunks = { - "time_counter": 1, + "time_counter": 24, "y": 230, "x": 190, } From 6e0eeac8de4c8a64baa64e332638e361118e89c7 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:10:36 -0700 Subject: [PATCH 06/12] Use dask processes for wwatch3 wind file generation The wind file creation process for the WWatch3 model has been updated to use processes rather than the default threads for the dask scheduler. Processes have been found to be more reliable for dask operations on the types of workloads we use in SalishSeaCast. --- nowcast/workers/make_ww3_wind_file.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nowcast/workers/make_ww3_wind_file.py b/nowcast/workers/make_ww3_wind_file.py index 59cac7bf..6c5c8319 100644 --- a/nowcast/workers/make_ww3_wind_file.py +++ b/nowcast/workers/make_ww3_wind_file.py @@ -149,6 +149,11 @@ def make_ww3_wind_file(parsed_args, config, *args): ) logger.debug("created winds dataset") ds.to_netcdf(os.fspath(nc_filepath)) + dask_scheduler = { + "scheduler": "processes", + "max_workers": 8, + } + ds.compute(**dask_scheduler) logger.debug(f"stored wind forcing file: {nc_filepath}") checklist = {run_type: os.fspath(nc_filepath)} return checklist From a5a1c91a5cd66d860cabb8efe35664114cd6049f Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:14:01 -0700 Subject: [PATCH 07/12] Use netcdf4 to save dataset in make_ww3_wind_file Explicitly use netcdf4 as the engine for dataset writing. This avoids incompatibilities in the resulting file that arise if it is written using h5netcdf. --- nowcast/workers/make_ww3_wind_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nowcast/workers/make_ww3_wind_file.py b/nowcast/workers/make_ww3_wind_file.py index 6c5c8319..c5c4bb2f 100644 --- a/nowcast/workers/make_ww3_wind_file.py +++ b/nowcast/workers/make_ww3_wind_file.py @@ -148,12 +148,12 @@ def make_ww3_wind_file(parsed_args, config, *args): hrdps.time_counter, lats, lons, hrdps.u_wind, hrdps.v_wind, datasets ) logger.debug("created winds dataset") - ds.to_netcdf(os.fspath(nc_filepath)) dask_scheduler = { "scheduler": "processes", "max_workers": 8, } ds.compute(**dask_scheduler) + ds.to_netcdf(nc_filepath, engine="netcdf4") logger.debug(f"stored wind forcing file: {nc_filepath}") checklist = {run_type: os.fspath(nc_filepath)} return checklist From b08a478d9659589a7452d897676e71f89db3fd4e Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:20:29 -0700 Subject: [PATCH 08/12] Update make_ww3_current_file re: unneeded variables Refactored the WWatch3 current file generation to remove unnecessary variables and improved related tests. The code now drops more unneeded variables to reduce the memory load. The corresponding tests are also enhanced to accurately represent these changes. --- nowcast/workers/make_ww3_current_file.py | 12 +++++++++--- tests/workers/test_make_ww3_current_file.py | 14 +++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/nowcast/workers/make_ww3_current_file.py b/nowcast/workers/make_ww3_current_file.py index 4ecab969..327a3cc7 100644 --- a/nowcast/workers/make_ww3_current_file.py +++ b/nowcast/workers/make_ww3_current_file.py @@ -106,7 +106,14 @@ def make_ww3_current_file(parsed_args, config, *args): datasets = _calc_forecast2_datasets( run_date, nemo_dir, nemo_file_tmpl, dest_dir ) - with xarray.open_dataset(mesh_mask) as grid: + drop_vars = { + 'gphiu', 'vmask', 'gdept_0', 'gdepw_0', 'umask', 'gphif', 'e3v_0', 'time_counter', + 'isfdraft', 'glamu', 'e1f', 'vmaskutil', 'mbathy', 'e2t', 'e2u', 'e3u_0', 'ff', 'gdept_1d', + 'gphit', 'e3w_0', 'e1u', 'e1t', 'e2v', 'fmaskutil', 'tmaskutil', 'gdepv', 'misf', 'gphiv', + 'e3t_1d', 'fmask', 'tmask', 'e3t_0', 'gdepw_1d', 'gdepu', 'glamt', 'glamf', + 'e3w_1d', 'e1v', 'umaskutil', 'glamv', 'e2f', + } + with xarray.open_dataset(mesh_mask, drop_variables=drop_vars) as grid: lats = grid.nav_lat[1:, 1:] lons = grid.nav_lon[1:, 1:] + 360 logger.debug(f"lats and lons from: {mesh_mask}") @@ -118,6 +125,7 @@ def make_ww3_current_file(parsed_args, config, *args): "bounds_nav_lat", "depthu_bounds", "depthv_bounds", + "time_centered", "time_centered_bounds", "time_counter_bounds", } @@ -156,9 +164,7 @@ def make_ww3_current_file(parsed_args, config, *args): u_unstaggered, v_unstaggered = viz_tools.unstagger( u_nemo.vozocrtx.isel(depthu=0), v_nemo.vomecrty.isel(depthv=0) ) - del u_unstaggered.coords["time_centered"] del u_unstaggered.coords["depthu"] - del v_unstaggered.coords["time_centered"] del v_unstaggered.coords["depthv"] logger.debug("unstaggered velocity components on to mesh mask lats/lons") u_current, v_current = viz_tools.rotate_vel(u_unstaggered, v_unstaggered) diff --git a/tests/workers/test_make_ww3_current_file.py b/tests/workers/test_make_ww3_current_file.py index e2d1aa95..2fec1b2f 100644 --- a/tests/workers/test_make_ww3_current_file.py +++ b/tests/workers/test_make_ww3_current_file.py @@ -340,7 +340,19 @@ def test_mesh_mask_dataset( m_unstagger.return_value = (MagicMock(), MagicMock()) m_rotate_vel.return_value = (MagicMock(), MagicMock()) make_ww3_current_file.make_ww3_current_file(parsed_args, config) - m_open_dataset.assert_called_once_with("grid/mesh_mask201702.nc") + drop_vars = { + 'gphiu', 'vmask', 'gdept_0', 'gdepw_0', 'umask', 'gphif', 'e3v_0', 'time_counter', + 'isfdraft', 'glamu', 'e1f', 'vmaskutil', 'mbathy', 'e2t', 'e2u', 'e3u_0', 'ff', + 'gdept_1d', + 'gphit', 'e3w_0', 'e1u', 'e1t', 'e2v', 'fmaskutil', 'tmaskutil', 'gdepv', 'misf', + 'gphiv', + 'e3t_1d', 'fmask', 'tmask', 'e3t_0', 'gdepw_1d', 'gdepu', 'glamt', 'glamf', + 'e3w_1d', 'e1v', 'umaskutil', 'glamv', 'e2f', + } + m_open_dataset.assert_called_once_with( + "grid/mesh_mask201702.nc", + drop_variables=drop_vars, + ) class TestCalcNowcastDatasets: From 5a1f534a5bde6bff6bbc4d3844003a9f024370aa Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:24:58 -0700 Subject: [PATCH 09/12] Switch to h5netcdf engine for xarray dataset reads The h5netcdf engine has been set as the engine for opening datasets in 'make_ww3_current_file.py'. The intent is to avoid the netcdf4 package thread-safety issues. The corresponding test cases in 'test_make_ww3_current_file.py' have also been updated to reflect this change. --- nowcast/workers/make_ww3_current_file.py | 4 +- tests/workers/test_make_ww3_current_file.py | 49 ++++++++++++++++++--- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/nowcast/workers/make_ww3_current_file.py b/nowcast/workers/make_ww3_current_file.py index 327a3cc7..0aba9e86 100644 --- a/nowcast/workers/make_ww3_current_file.py +++ b/nowcast/workers/make_ww3_current_file.py @@ -113,7 +113,7 @@ def make_ww3_current_file(parsed_args, config, *args): 'e3t_1d', 'fmask', 'tmask', 'e3t_0', 'gdepw_1d', 'gdepu', 'glamt', 'glamf', 'e3w_1d', 'e1v', 'umaskutil', 'glamv', 'e2f', } - with xarray.open_dataset(mesh_mask, drop_variables=drop_vars) as grid: + with xarray.open_dataset(mesh_mask, drop_variables=drop_vars, engine="h5netcdf") as grid: lats = grid.nav_lat[1:, 1:] lons = grid.nav_lon[1:, 1:] + 360 logger.debug(f"lats and lons from: {mesh_mask}") @@ -150,6 +150,7 @@ def make_ww3_current_file(parsed_args, config, *args): coords="minimal", data_vars="minimal", drop_variables=drop_vars, + engine="h5netcdf", ) as u_nemo: logger.debug(f'u velocities from {datasets["u"]}') with xarray.open_mfdataset( @@ -159,6 +160,7 @@ def make_ww3_current_file(parsed_args, config, *args): coords="minimal", data_vars="minimal", drop_variables=drop_vars, + engine="h5netcdf", ) as v_nemo: logger.debug(f'v velocities from {datasets["v"]}') u_unstaggered, v_unstaggered = viz_tools.unstagger( diff --git a/tests/workers/test_make_ww3_current_file.py b/tests/workers/test_make_ww3_current_file.py index 2fec1b2f..5197b35a 100644 --- a/tests/workers/test_make_ww3_current_file.py +++ b/tests/workers/test_make_ww3_current_file.py @@ -341,17 +341,52 @@ def test_mesh_mask_dataset( m_rotate_vel.return_value = (MagicMock(), MagicMock()) make_ww3_current_file.make_ww3_current_file(parsed_args, config) drop_vars = { - 'gphiu', 'vmask', 'gdept_0', 'gdepw_0', 'umask', 'gphif', 'e3v_0', 'time_counter', - 'isfdraft', 'glamu', 'e1f', 'vmaskutil', 'mbathy', 'e2t', 'e2u', 'e3u_0', 'ff', - 'gdept_1d', - 'gphit', 'e3w_0', 'e1u', 'e1t', 'e2v', 'fmaskutil', 'tmaskutil', 'gdepv', 'misf', - 'gphiv', - 'e3t_1d', 'fmask', 'tmask', 'e3t_0', 'gdepw_1d', 'gdepu', 'glamt', 'glamf', - 'e3w_1d', 'e1v', 'umaskutil', 'glamv', 'e2f', + "gphiu", + "vmask", + "gdept_0", + "gdepw_0", + "umask", + "gphif", + "e3v_0", + "time_counter", + "isfdraft", + "glamu", + "e1f", + "vmaskutil", + "mbathy", + "e2t", + "e2u", + "e3u_0", + "ff", + "gdept_1d", + "gphit", + "e3w_0", + "e1u", + "e1t", + "e2v", + "fmaskutil", + "tmaskutil", + "gdepv", + "misf", + "gphiv", + "e3t_1d", + "fmask", + "tmask", + "e3t_0", + "gdepw_1d", + "gdepu", + "glamt", + "glamf", + "e3w_1d", + "e1v", + "umaskutil", + "glamv", + "e2f", } m_open_dataset.assert_called_once_with( "grid/mesh_mask201702.nc", drop_variables=drop_vars, + engine="h5netcdf", ) From 652d1311849b9d1b70ccb45fd69af00a9998c9d9 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:28:45 -0700 Subject: [PATCH 10/12] Decrease current time_counter chunk size to 1 The chunk size for the time_counter variable in the make_current_wind_file worker has decreased from 3 to 1. Testing showed that the smaller chunk size resulted in slightly faster processing. --- nowcast/workers/make_ww3_current_file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nowcast/workers/make_ww3_current_file.py b/nowcast/workers/make_ww3_current_file.py index 0aba9e86..0517683d 100644 --- a/nowcast/workers/make_ww3_current_file.py +++ b/nowcast/workers/make_ww3_current_file.py @@ -131,13 +131,13 @@ def make_ww3_current_file(parsed_args, config, *args): } chunks = { "u": { - "time_counter": 3, + "time_counter": 1, "depthu": 40, "y": 898, "x": 398, }, "v": { - "time_counter": 3, + "time_counter": 1, "depthv": 40, "y": 898, "x": 398, From ce0a49477fdbd852926cbae0fb1d01f2153deb38 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:30:40 -0700 Subject: [PATCH 11/12] Use dask processes for wwatch3 currents file generation The currents file creation process for the WWatch3 model has been updated to use processes rather than the default threads for the dask scheduler. Processes have been found to be more reliable for dask operations on the types of workloads we use in SalishSeaCast. --- nowcast/workers/make_ww3_current_file.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nowcast/workers/make_ww3_current_file.py b/nowcast/workers/make_ww3_current_file.py index 0517683d..a2c2edc5 100644 --- a/nowcast/workers/make_ww3_current_file.py +++ b/nowcast/workers/make_ww3_current_file.py @@ -176,6 +176,11 @@ def make_ww3_current_file(parsed_args, config, *args): ) logger.debug("created currents dataset") ds.to_netcdf(os.fspath(nc_filepath)) + dask_scheduler = { + "scheduler": "processes", + "max_workers": 8, + } + ds.compute(**dask_scheduler) logger.debug(f"stored currents forcing file: {nc_filepath}") checklist = { run_type: os.fspath(nc_filepath), From 6cda3111e3794f9112602ea650cc0c18278e6880 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 7 Jun 2024 10:32:45 -0700 Subject: [PATCH 12/12] Use netcdf4 to save dataset in make_current_wind_file Explicitly use netcdf4 as the engine for dataset writing. This avoids incompatibilities in the resulting file that arise if it is written using h5netcdf. --- nowcast/workers/make_ww3_current_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nowcast/workers/make_ww3_current_file.py b/nowcast/workers/make_ww3_current_file.py index a2c2edc5..60ca1b91 100644 --- a/nowcast/workers/make_ww3_current_file.py +++ b/nowcast/workers/make_ww3_current_file.py @@ -175,12 +175,12 @@ def make_ww3_current_file(parsed_args, config, *args): u_current.time_counter, lats, lons, u_current, v_current, datasets ) logger.debug("created currents dataset") - ds.to_netcdf(os.fspath(nc_filepath)) dask_scheduler = { "scheduler": "processes", "max_workers": 8, } ds.compute(**dask_scheduler) + ds.to_netcdf(nc_filepath, engine="netcdf4") logger.debug(f"stored currents forcing file: {nc_filepath}") checklist = { run_type: os.fspath(nc_filepath),