rosepearson · rosepearson · Oct 11, 2023 · Oct 9, 2023 · Oct 9, 2023 · Oct 10, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "geofabrics"
-version = "1.0.0"
+version = "1.0.1"
 description = "A package for creating geofabrics for flood modelling."
 readme = "README.md"
 authors = [{ name = "Rose pearson", email = "[email protected]" }]

diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py
diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py
@@ -11,6 +11,7 @@
 import abc
 import logging
 import distributed
+import dask.dataframe
 import rioxarray
 import copy
 import geopandas
@@ -870,16 +871,6 @@ def run(self):
             elevation_range=self.get_instruction_general("elevation_range"),
         )
 
-        # Load in LiDAR tiles
-        self.raw_dem.add_lidar(
-            lidar_datasets_info=lidar_datasets_info,
-            lidar_classifications_to_keep=self.get_instruction_general(
-                "lidar_classifications_to_keep"
-            ),
-            chunk_size=self.get_processing_instructions("chunk_size"),
-            metadata=self.create_metadata(),
-        )  # Note must be called after all others if it is to be complete
-
         # Setup Dask cluster and client - LAZY SAVE LIDAR DEM
         cluster_kwargs = {
             "n_workers": self.get_processing_instructions("number_of_cores"),
@@ -892,8 +883,32 @@ def run(self):
             print("Dask client:", client)
             print("Dask dashboard:", client.dashboard_link)
 
+            # Load in LiDAR tiles
+            self.raw_dem.add_lidar(
+                lidar_datasets_info=lidar_datasets_info,
+                lidar_classifications_to_keep=self.get_instruction_general(
+                    "lidar_classifications_to_keep"
+                ),
+                chunk_size=self.get_processing_instructions("chunk_size"),
+                metadata=self.create_metadata(),
+            )  # Note must be called after all others if it is to be complete
+
+            # Add a coarse DEM if significant area without LiDAR and a coarse DEM
+            if self.check_vector_or_raster(key="coarse_dems", api_type="raster"):
+                coarse_dem_paths = self.get_vector_or_raster_paths(
+                    key="coarse_dems", data_type="raster"
+                )
+
+                # Add coarse DEMs if there are any and if area
+                self.raw_dem.add_coarse_dems(
+                    coarse_dem_paths=coarse_dem_paths,
+                    area_threshold=area_threshold,
+                    buffer_cells=self.get_instruction_general("lidar_buffer"),
+                    chunk_size=self.get_processing_instructions("chunk_size"),
+                )
+
             # compute and save raw DEM
-            logging.info("In processor.DemGenerator - write out the raw DEM from LiDAR")
+            logging.info("In processor.DemGenerator - write out the raw DEM to netCDF")
             try:
                 self.raw_dem.dem.to_netcdf(
                     self.get_instruction_path("raw_dem"),
@@ -910,46 +925,6 @@ def run(self):
                 )
                 raise caught_exception
 
-        # Add a coarse DEM if significant area without LiDAR and a coarse DEM
-        if self.check_vector_or_raster(key="coarse_dems", api_type="raster"):
-            coarse_dem_paths = self.get_vector_or_raster_paths(
-                key="coarse_dems", data_type="raster"
-            )
-
-            # Add coarse DEMs if there are any and if area
-            self.raw_dem.add_coarse_dems(
-                coarse_dem_paths=coarse_dem_paths,
-                area_threshold=area_threshold,
-                buffer_cells=self.get_instruction_general("lidar_buffer"),
-                chunk_size=self.get_processing_instructions("chunk_size"),
-            )
-
-            # Run dask a second time over the coarse DEM if added
-            cluster = distributed.LocalCluster(**cluster_kwargs)
-            with cluster, distributed.Client(cluster) as client:
-                print("Dask client:", client)
-                print("Dask dashboard:", client.dashboard_link)
-
-                # compute and save raw DEM
-                logging.info(
-                    "In processor.DemGenerator - write out the raw DEM from Coarse"
-                )
-                try:
-                    self.raw_dem.dem.to_netcdf(
-                        self.get_instruction_path("raw_dem"),
-                        format="NETCDF4",
-                        engine="netcdf4",
-                    )
-                except (Exception, KeyboardInterrupt) as caught_exception:
-                    pathlib.Path(self.get_instruction_path("raw_dem")).unlink()
-                    logging.info(
-                        f"Caught error {caught_exception} and deleting"
-                        "partially created netCDF output "
-                        f"{self.get_instruction_path('raw_dem')}"
-                        " before re-raising error."
-                    )
-                    raise caught_exception
-
         if self.debug:
             # Record the parameter used during execution - append to existing
             with open(
@@ -1106,27 +1081,50 @@ def run(self):
         # create the catchment geometry object
         self.catchment_geometry = self.create_catchment()
 
-        # setup the hydrologically conditioned DEM generator
-        self.hydrologic_dem = dem.HydrologicallyConditionedDem(
-            catchment_geometry=self.catchment_geometry,
-            raw_dem_path=self.get_instruction_path("raw_dem"),
-            interpolation_method=self.get_instruction_general(
-                key="interpolation", subkey="no_data"
-            ),
-        )
+        # Setup Dask cluster and client - LAZY SAVE LIDAR DEM
+        cluster_kwargs = {
+            "n_workers": self.get_processing_instructions("number_of_cores"),
+            "threads_per_worker": 1,
+            "processes": True,
+            "memory_limit": self.get_processing_instructions("memory_limit"),
+        }
+        cluster = distributed.LocalCluster(**cluster_kwargs)
+        with cluster, distributed.Client(cluster) as client:
+            print("Dask client:", client)
+            print("Dask dashboard:", client.dashboard_link)
 
-        # Check for and add any bathymetry information
-        self.add_bathymetry(
-            area_threshold=area_threshold,
-            catchment_dirs=self.get_instruction_path("extents"),
-        )
+            # setup the hydrologically conditioned DEM generator
+            self.hydrologic_dem = dem.HydrologicallyConditionedDem(
+                catchment_geometry=self.catchment_geometry,
+                raw_dem_path=self.get_instruction_path("raw_dem"),
+                interpolation_method=self.get_instruction_general(
+                    key="interpolation", subkey="no_data"
+                ),
+            )
 
-        # fill combined dem - save results
-        self.hydrologic_dem.dem.to_netcdf(
-            self.get_instruction_path("result_dem"),
-            format="NETCDF4",
-            engine="netcdf4",
-        )
+            # Check for and add any bathymetry information
+            self.add_bathymetry(
+                area_threshold=area_threshold,
+                catchment_dirs=self.get_instruction_path("extents"),
+            )
+
+            # fill combined dem - save results
+            logging.info("In processor.DemGenerator - write out the raw DEM to netCDF")
+            try:
+                self.hydrologic_dem.dem.to_netcdf(
+                    self.get_instruction_path("result_dem"),
+                    format="NETCDF4",
+                    engine="netcdf4",
+                )
+            except (Exception, KeyboardInterrupt) as caught_exception:
+                pathlib.Path(self.get_instruction_path("raw_dem")).unlink()
+                logging.info(
+                    f"Caught error {caught_exception} and deleting"
+                    "partially created netCDF output "
+                    f"{self.get_instruction_path('raw_dem')}"
+                    " before re-raising error."
+                )
+                raise caught_exception
         if self.debug:
             # Record the parameter used during execution - append to existing
             with open(
@@ -1172,26 +1170,6 @@ def run(self):
         # Get LiDAR data file-list - this may involve downloading lidar files
         lidar_datasets_info = self.get_lidar_datasets_info()
 
-        # setup the roughness DEM generator
-        self.roughness_dem = dem.RoughnessDem(
-            catchment_geometry=self.catchment_geometry,
-            hydrological_dem_path=self.get_instruction_path("result_dem"),
-            elevation_range=self.get_instruction_general("elevation_range"),
-            interpolation_method=self.get_instruction_general(
-                key="interpolation", subkey="no_data"
-            ),
-        )
-
-        # Load in LiDAR tiles
-        self.roughness_dem.add_lidar(
-            lidar_datasets_info=lidar_datasets_info,
-            lidar_classifications_to_keep=self.get_instruction_general(
-                "lidar_classifications_to_keep"
-            ),
-            chunk_size=self.get_processing_instructions("chunk_size"),
-            metadata=self.create_metadata(),
-        )  # Note must be called after all others if it is to be complete
-
         # Setup Dask cluster and client
         cluster_kwargs = {
             "n_workers": self.get_processing_instructions("number_of_cores"),
@@ -1203,6 +1181,27 @@ def run(self):
         with cluster, distributed.Client(cluster) as client:
             print("Dask client:", client)
             print("Dask dashboard:", client.dashboard_link)
+
+            # setup the roughness DEM generator
+            self.roughness_dem = dem.RoughnessDem(
+                catchment_geometry=self.catchment_geometry,
+                hydrological_dem_path=self.get_instruction_path("result_dem"),
+                elevation_range=self.get_instruction_general("elevation_range"),
+                interpolation_method=self.get_instruction_general(
+                    key="interpolation", subkey="no_data"
+                ),
+            )
+
+            # Load in LiDAR tiles
+            self.roughness_dem.add_lidar(
+                lidar_datasets_info=lidar_datasets_info,
+                lidar_classifications_to_keep=self.get_instruction_general(
+                    "lidar_classifications_to_keep"
+                ),
+                chunk_size=self.get_processing_instructions("chunk_size"),
+                metadata=self.create_metadata(),
+            )  # Note must be called after all others if it is to be complete
+
             # save results
             try:
                 self.roughness_dem.dem.to_netcdf(
@@ -2685,6 +2684,9 @@ def sample_location_down_slope(row):
 
             return sampled_multipoints
 
+        """open_waterways = dask.dataframe.from_pandas( # To incorporate later
+            open_waterways,
+            npartitions=self.get_processing_instructions("number_of_cores"))"""
         open_waterways["points"] = open_waterways.apply(
             lambda row: sample_location_down_slope(row=row),
             axis=1,

diff --git a/src/geofabrics/version.py b/src/geofabrics/version.py
@@ -3,4 +3,4 @@
 Contains the package version information
 """
 
-__version__ = "1.0.0"
+__version__ = "1.0.1"
diff --git a/tests/test_dem_generation_local_1/data/benchmark_dem.nc b/tests/test_dem_generation_local_1/data/benchmark_dem.nc
diff --git a/tests/test_dem_generation_local_3/data/benchmark_dem.nc b/tests/test_dem_generation_local_3/data/benchmark_dem.nc
diff --git a/tests/test_many_stages_local/data/benchmark_geofabric.nc b/tests/test_many_stages_local/data/benchmark_geofabric.nc
diff --git a/tests/test_many_stages_multiple_1/data/benchmark.nc b/tests/test_many_stages_multiple_1/data/benchmark.nc
diff --git a/tests/test_many_stages_multiple_2/data/benchmark.nc b/tests/test_many_stages_multiple_2/data/benchmark.nc
diff --git a/tests/test_many_stages_wellington/data/benchmark.nc b/tests/test_many_stages_wellington/data/benchmark.nc
diff --git a/tests/test_many_stages_wellington/data/benchmark_geofabric.nc b/tests/test_many_stages_wellington/data/benchmark_geofabric.nc
diff --git a/tests/test_many_stages_wellington/instruction.json b/tests/test_many_stages_wellington/instruction.json
@@ -187,7 +187,7 @@
 			"extents": "catchment.geojson",
 			"result_dem": "test_dem.nc",
 			"result_geofabric": "test_geofabric.nc",
-			"benchmark_geofabric": "benchmark_geofabric.nc"
+			"benchmark": "benchmark.nc"
 		},
 		"datasets": {
 			"lidar": {

diff --git a/tests/test_many_stages_wellington/test_many_stages_wellington.py b/tests/test_many_stages_wellington/test_many_stages_wellington.py
@@ -115,8 +115,7 @@ def test_result_geofabric_windows(self):
 
         # Load in benchmark
         file_path = (
-            self.cache_dir
-            / self.instructions["roughness"]["data_paths"]["benchmark_geofabric"]
+            self.cache_dir / self.instructions["roughness"]["data_paths"]["benchmark"]
         )
         with rioxarray.rioxarray.open_rasterio(file_path, masked=True) as benchmark:
             benchmark.load()
@@ -165,8 +164,7 @@ def test_result_geofabric_linux(self):
 
         # load in benchmark
         file_path = (
-            self.cache_dir
-            / self.instructions["roughness"]["data_paths"]["benchmark_geofabric"]
+            self.cache_dir / self.instructions["roughness"]["data_paths"]["benchmark"]
         )
         with rioxarray.rioxarray.open_rasterio(file_path, masked=True) as benchmark:
             benchmark.load()