v4.5.13.4 Clean up Python files in FIM pipeline (#1382)

NOAA-OWP · Jan 3, 2025 · 5e20284 · 5e20284
1 parent d6ae01d
commit 5e20284
Show file tree

Hide file tree

Showing 19 changed files with 180 additions and 126 deletions.
diff --git a/data/create_vrt_file.py b/data/create_vrt_file.py
@@ -3,7 +3,6 @@
 import argparse
 import logging
 import os
-import sys
 from datetime import datetime
 
 from osgeo import gdal

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,23 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+
+
+## v4.5.13.4 - 2024-01-03 - [PR#1382](https://github.com/NOAA-OWP/inundation-mapping/pull/1382)
+
+Cleans up Python files within `delineate_hydros_and_produce_HAND.sh` to improve performance, especially memory management, including removing unused imports, deleting object references when objects are no longer needed, and removing GDAL from the `fim_process_unit_wb.sh` step of FIM pipeline. Contributes to #1351 and #1376.
+
+### Changes
+- `data/create_vrt_file.py` and `tools/pixel_counter.py`: Removes unused import
+- `src/`
+    - `accumulate_headwaters.py`, `add_crosswalk.py`, `adjust_thalweg_lateral.py`, `filter_catchments_and_add_attributes.py`, `heal_bridges_osm.py`, `make_rem.py`, `make_stages_and_catchlist.py`, `mitigate_branch_outlet_backpool.py`, `reachID_grid_to_vector_points.py`, `split_flows.py`, `unique_pixel_and_allocation.py`: Deletes objects no longer in use
+    - `delineate_hydros_and_produce_HAND.sh`, `run_by_branch.sh`, `run_unit_wb.sh` : Updates arguments
+    - `getRasterInfoNative.py`: Refactors in `rasterio` (removed `gdal`)
+- `tools/evaluate_crosswalk.py`: Deletes objects no longer in use
+
+<br/><br/>
+
+
 ## v4.5.13.3 - 2025-01-03 - [PR#1048](https://github.com/NOAA-OWP/inundation-mapping/pull/1048)
 
 This script produces inundation depths and attempts to overcome the catchment boundary issue by interpolating water surface elevations between catchments. Water surface calculations require the hydroconditioned DEM (`dem_thalwegCond_{}.tif`) for computation, however, this file is not in the standard outputs from fim_pipeline.sh. Therefore, users may have to re-run fim_pipeline.sh with dem_thalwegCond_{}.tif removed from all deny lists.

diff --git a/src/accumulate_headwaters.py b/src/accumulate_headwaters.py
@@ -38,9 +38,6 @@ def accumulate_flow(
         data = src.read(1)
         nodata = src.nodata
         profile = src.profile
-        # transform = src.transform
-        # crs = src.crs
-        # latlon = crs.to_epsg() == 4326
 
     # Convert the TauDEM flow direction raster to a pyflwdir flow direction array
     temp = data.copy()
@@ -55,17 +52,23 @@ def accumulate_flow(
     temp[data == 8] = 2
     temp[data == nodata] = 247
 
+    del data
+
     temp = temp.astype(np.uint8)
 
     flw = pyflwdir.from_array(temp, ftype='d8')
 
+    del temp
+
     # Read the flow direction raster
     with rio.open(headwaters_filename) as src:
         headwaters = src.read(1)
         nodata = src.nodata
 
     flowaccum = flw.accuflux(headwaters, nodata=nodata, direction='up')
 
+    del flw
+
     stream = np.where(flowaccum > 0, flow_accumulation_threshold, 0)
 
     # Write the flow accumulation raster
@@ -76,6 +79,8 @@ def accumulate_flow(
         dst.write(flowaccum, 1)
         dst2.write(stream, 1)
 
+    del flowaccum, stream
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()

diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py
@@ -29,38 +29,21 @@ def add_crosswalk(
     output_hydro_table_fileName,
     input_huc_fileName,
     input_nwmflows_fileName,
-    input_nwmcatras_fileName,
     mannings_n,
-    input_nwmcat_fileName,
-    extent,
     small_segments_filename,
     min_catchment_area,
     min_stream_length,
     huc_id,
-    calibration_mode=False,
 ):
     input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
     input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True)
     input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True)
-    input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True)
     input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True)
     min_catchment_area = float(min_catchment_area)  # 0.25#
     min_stream_length = float(min_stream_length)  # 0.5#
 
     input_catchments = input_catchments.dissolve(by='HydroID').reset_index()
 
-    ## crosswalk using stream segment midpoint method
-    input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")
-
-    # only reduce nwm catchments to mainstems if running mainstems
-    if extent == 'MS':
-        input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]
-
-    input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
-    if input_nwmcat.feature_id.dtype != 'int':
-        input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
-    input_nwmcat = input_nwmcat.set_index('feature_id')
-
     input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
     if input_nwmflows.feature_id.dtype != 'int':
         input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
@@ -89,6 +72,8 @@ def add_crosswalk(
     crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance'])
     crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id')
 
+    del input_nwmflows
+
     if crosswalk.empty:
         print("No relevant streams within HUC boundaries.")
         sys.exit(FIM_exit_codes.NO_VALID_CROSSWALKS.value)
@@ -97,6 +82,8 @@ def add_crosswalk(
         input_catchments.HydroID = input_catchments.HydroID.astype(int)
     output_catchments = input_catchments.merge(crosswalk, on='HydroID')
 
+    del input_catchments
+
     if output_catchments.empty:
         print("No valid catchments remain.")
         sys.exit(FIM_exit_codes.NO_VALID_CROSSWALKS.value)
@@ -105,6 +92,8 @@ def add_crosswalk(
         input_flows.HydroID = input_flows.HydroID.astype(int)
     output_flows = input_flows.merge(crosswalk, on='HydroID')
 
+    del input_flows
+
     # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
     if 'areasqkm' not in output_catchments.columns:
         output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
@@ -276,6 +265,9 @@ def add_crosswalk(
     input_src_base['Bathymetry_source'] = pd.NA
 
     output_src = input_src_base.drop(columns=['CatchId']).copy()
+
+    del input_src_base
+
     if output_src.HydroID.dtype != 'int':
         output_src.HydroID = output_src.HydroID.astype(int)
 
@@ -304,6 +296,9 @@ def add_crosswalk(
             )
             merged_output_src = merged_output_src[['HydroID', 'Stage', 'Discharge (m3s-1)_df2']]
             output_src = pd.merge(output_src, merged_output_src, on=['HydroID', 'Stage'], how='left')
+
+            del merged_output_src
+
             output_src['Discharge (m3s-1)'] = output_src['Discharge (m3s-1)_df2'].fillna(
                 output_src['Discharge (m3s-1)']
             )
@@ -322,8 +317,12 @@ def add_crosswalk(
                         ['Discharge (m3s-1)'],
                     ] = src_stage[1]
 
+    del sml_segs
+
     output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
 
+    del crosswalk
+
     output_crosswalk = output_src[['HydroID', 'feature_id']]
     output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)
 
@@ -386,6 +385,8 @@ def add_crosswalk(
         input_huc[FIM_ID] = input_huc[FIM_ID].astype(str)
     output_hydro_table = output_hydro_table.merge(input_huc.loc[:, [FIM_ID, 'HUC8']], how='left', on=FIM_ID)
 
+    del input_huc
+
     if output_flows.HydroID.dtype != 'str':
         output_flows.HydroID = output_flows.HydroID.astype(str)
     output_hydro_table = output_hydro_table.merge(
@@ -428,6 +429,8 @@ def add_crosswalk(
     with open(output_src_json_fileName, 'w') as f:
         json.dump(output_src_json, f, sort_keys=True, indent=2)
 
+    del output_catchments, output_flows, output_src, output_crosswalk, output_hydro_table, output_src_json
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
@@ -452,22 +455,16 @@ def add_crosswalk(
     parser.add_argument("-t", "--output-hydro-table-fileName", help="Hydrotable", required=True)
     parser.add_argument("-w", "--input-huc-fileName", help="HUC8 boundary", required=True)
     parser.add_argument("-b", "--input-nwmflows-fileName", help="Subest NWM burnlines", required=True)
-    parser.add_argument("-y", "--input-nwmcatras-fileName", help="NWM catchment raster", required=False)
     parser.add_argument(
         "-m",
         "--mannings-n",
         help="Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.",
         required=True,
     )
-    parser.add_argument("-u", "--huc-id", help="HUC ID", required=False)
-    parser.add_argument("-z", "--input-nwmcat-fileName", help="NWM catchment polygon", required=True)
-    parser.add_argument("-p", "--extent", help="GMS only for now", default="GMS", required=False)
+    parser.add_argument("-u", "--huc-id", help="HUC ID", required=True)
     parser.add_argument(
         "-k", "--small-segments-filename", help="output list of short segments", required=True
     )
-    parser.add_argument(
-        "-c", "--calibration-mode", help="Mannings calibration flag", required=False, action="store_true"
-    )
     parser.add_argument("-e", "--min-catchment-area", help="Minimum catchment area", required=True)
     parser.add_argument("-g", "--min-stream-length", help="Minimum stream length", required=True)
 

diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py
@@ -98,6 +98,8 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w
                     ndv,
                 )
 
+                del elevation_window, zone_window, cost_window
+
             # --------------------------------------------------------------------------------------------- #
 
         # Specify raster object metadata.
@@ -124,6 +126,8 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w
 
                 dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1)
 
+                del dem_window, zone_window, thalweg_window, minimized_dem_window
+
 
 if __name__ == '__main__':
     # Parse arguments.

diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh
@@ -243,9 +243,7 @@ python3 $srcDir/add_crosswalk.py \
     -w $tempHucDataDir/wbd8_clp.gpkg \
     -b $b_arg \
     -u $hucNumber \
-    -y $tempCurrentBranchDataDir/nwm_catchments_proj_subset.tif \
     -m $manning_n \
-    -z $z_arg \
     -k $tempCurrentBranchDataDir/small_segments_$current_branch_id.csv \
     -e $min_catchment_area \
     -g $min_stream_length

diff --git a/src/filter_catchments_and_add_attributes.py b/src/filter_catchments_and_add_attributes.py
@@ -28,10 +28,14 @@ def filter_catchments_and_add_attributes(
     # filter segments within huc boundary
     select_flows = tuple(map(str, map(int, wbd[wbd.HUC8.str.contains(huc_code)][FIM_ID])))
 
+    del wbd
+
     if input_flows.HydroID.dtype != 'str':
         input_flows.HydroID = input_flows.HydroID.astype(str)
     output_flows = input_flows[input_flows.HydroID.str.startswith(select_flows)].copy()
 
+    del input_flows
+
     if output_flows.HydroID.dtype != 'int':
         output_flows.HydroID = output_flows.HydroID.astype(int)
 
@@ -73,6 +77,8 @@ def filter_catchments_and_add_attributes(
         print("There are no flowlines in the HUC after stream order filtering.")
         sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value)  # will send a 61 back
 
+    del input_catchments
+
 
 if __name__ == '__main__':
     # Parse arguments.

diff --git a/src/getRasterInfoNative.py b/src/getRasterInfoNative.py
@@ -3,13 +3,13 @@
 
 # TODO standardize this script
 
-import os.path
-import sys
+import argparse
 
-from osgeo import gdal, osr
+import rasterio as rio
+from osgeo import osr
 
 
-gdal.UseExceptions()
+osr.UseExceptions()
 
 
 """
@@ -64,69 +64,47 @@ def ReprojectCoords(coords, src_srs, tgt_srs):
     return trans_coords
 
 
-# get file size function
-def sizeof_fmt(num, suffix='B'):
-    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
-        if abs(num) < 1024.0:
-            return "%3.1f%s%s" % (num, unit, suffix)
-        num /= 1024.0
-    return "%.1f%s%s" % (num, 'Y', suffix)
-
-
-# open dataset
-ds = gdal.Open(sys.argv[1])
-fsize = sizeof_fmt(os.path.getsize(sys.argv[1]))
-cols = ds.RasterXSize
-rows = ds.RasterYSize
-nodata = ds.GetRasterBand(1).GetNoDataValue()
-# stats = ds.GetRasterBand(1).GetStatistics(True, True)
-
-gt = ds.GetGeoTransform()
-ext = GetExtent(gt, cols, rows)
-
-src_srs = osr.SpatialReference()
-src_srs.ImportFromWkt(ds.GetProjection())
-tgt_srs = src_srs.CloneGeogCS()
-
-geo_ext = ReprojectCoords(ext, src_srs, tgt_srs)
-lon1 = ext[0][0]
-lon2 = ext[2][0]
-lat1 = ext[2][1]
-lat2 = ext[0][1]
-
-# calculate cellsize
-resx = (ext[2][0] - ext[0][0]) / cols
-resy = (ext[0][1] - ext[2][1]) / rows
-
-# print out RasterInfos
-print(
-    fsize,
-    cols,
-    rows,
-    nodata,
-    str(lon1),
-    str(lat1),
-    str(lon2),
-    str(lat2),
-    "{:.15f}".format(resx),
-    "{:.15f}".format(resy),
-)
-# print stats[0],
-# print stats[1],
-# print stats[2],
-# print stats[3],
-# print "\"" + str(lat1) + "," + str(lon1) + "," + str(lat2) + "," + str(lon2) + "\""
-# print str(lon1),
-# print str(lat1),
-# print str(lon2),
-# print str(lat2),
-
-# if resx < 0.01: # unit is degree
-# 	print "%.15f"  % (resx),
-# 	print "%.15f" % (resy)
-# else:
-# 	print "%.15f" % (resx),
-# 	print "%.15f" % (resy)
-
-# close dataset
-ds = None
+def get_raster_info(raster):
+    # open dataset
+    with rio.open(raster) as ds:
+        meta = ds.meta
+
+    # fsize = sizeof_fmt(os.path.getsize(sys.argv[1]))
+    cols = meta.get("width")
+    rows = meta.get("height")
+    nodata = meta.get("nodata")
+
+    gt = meta.get("transform")  # (1336251.3593209502, 10.0, 0.0, 657665.814333962, 0.0, -10.0)
+
+    ext = GetExtent(gt.to_gdal(), cols, rows)
+
+    lon1 = ext[0][0]
+    lon2 = ext[2][0]
+    lat1 = ext[2][1]
+    lat2 = ext[0][1]
+
+    # calculate cellsize
+    resx = (ext[2][0] - ext[0][0]) / cols
+    resy = (ext[0][1] - ext[2][1]) / rows
+
+    # print out RasterInfos
+    print(
+        cols,
+        rows,
+        nodata,
+        str(lon1),
+        str(lat1),
+        str(lon2),
+        str(lat2),
+        "{:.15f}".format(resx),
+        "{:.15f}".format(resy),
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Get raster information")
+    parser.add_argument("-r", "--raster", help="Raster file to get information from")
+
+    args = parser.parse_args()
+
+    get_raster_info(**vars(args))