Skip to content

Commit

Permalink
v4.5.13.4 Clean up Python files in FIM pipeline (#1382)
Browse files Browse the repository at this point in the history
  • Loading branch information
mluck authored Jan 3, 2025
1 parent d6ae01d commit 5e20284
Show file tree
Hide file tree
Showing 19 changed files with 180 additions and 126 deletions.
1 change: 0 additions & 1 deletion data/create_vrt_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import argparse
import logging
import os
import sys
from datetime import datetime

from osgeo import gdal
Expand Down
17 changes: 17 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.



## v4.5.13.4 - 2024-01-03 - [PR#1382](https://github.com/NOAA-OWP/inundation-mapping/pull/1382)

Cleans up Python files within `delineate_hydros_and_produce_HAND.sh` to improve performance, especially memory management, including removing unused imports, deleting object references when objects are no longer needed, and removing GDAL from the `fim_process_unit_wb.sh` step of FIM pipeline. Contributes to #1351 and #1376.

### Changes
- `data/create_vrt_file.py` and `tools/pixel_counter.py`: Removes unused import
- `src/`
- `accumulate_headwaters.py`, `add_crosswalk.py`, `adjust_thalweg_lateral.py`, `filter_catchments_and_add_attributes.py`, `heal_bridges_osm.py`, `make_rem.py`, `make_stages_and_catchlist.py`, `mitigate_branch_outlet_backpool.py`, `reachID_grid_to_vector_points.py`, `split_flows.py`, `unique_pixel_and_allocation.py`: Deletes objects no longer in use
- `delineate_hydros_and_produce_HAND.sh`, `run_by_branch.sh`, `run_unit_wb.sh` : Updates arguments
- `getRasterInfoNative.py`: Refactors in `rasterio` (removed `gdal`)
- `tools/evaluate_crosswalk.py`: Deletes objects no longer in use

<br/><br/>


## v4.5.13.3 - 2025-01-03 - [PR#1048](https://github.com/NOAA-OWP/inundation-mapping/pull/1048)

This script produces inundation depths and attempts to overcome the catchment boundary issue by interpolating water surface elevations between catchments. Water surface calculations require the hydroconditioned DEM (`dem_thalwegCond_{}.tif`) for computation, however, this file is not in the standard outputs from fim_pipeline.sh. Therefore, users may have to re-run fim_pipeline.sh with dem_thalwegCond_{}.tif removed from all deny lists.
Expand Down
11 changes: 8 additions & 3 deletions src/accumulate_headwaters.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ def accumulate_flow(
data = src.read(1)
nodata = src.nodata
profile = src.profile
# transform = src.transform
# crs = src.crs
# latlon = crs.to_epsg() == 4326

# Convert the TauDEM flow direction raster to a pyflwdir flow direction array
temp = data.copy()
Expand All @@ -55,17 +52,23 @@ def accumulate_flow(
temp[data == 8] = 2
temp[data == nodata] = 247

del data

temp = temp.astype(np.uint8)

flw = pyflwdir.from_array(temp, ftype='d8')

del temp

# Read the flow direction raster
with rio.open(headwaters_filename) as src:
headwaters = src.read(1)
nodata = src.nodata

flowaccum = flw.accuflux(headwaters, nodata=nodata, direction='up')

del flw

stream = np.where(flowaccum > 0, flow_accumulation_threshold, 0)

# Write the flow accumulation raster
Expand All @@ -76,6 +79,8 @@ def accumulate_flow(
dst.write(flowaccum, 1)
dst2.write(stream, 1)

del flowaccum, stream


if __name__ == '__main__':
parser = argparse.ArgumentParser()
Expand Down
45 changes: 21 additions & 24 deletions src/add_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,21 @@ def add_crosswalk(
output_hydro_table_fileName,
input_huc_fileName,
input_nwmflows_fileName,
input_nwmcatras_fileName,
mannings_n,
input_nwmcat_fileName,
extent,
small_segments_filename,
min_catchment_area,
min_stream_length,
huc_id,
calibration_mode=False,
):
input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True)
input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True)
input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True)
input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True)
min_catchment_area = float(min_catchment_area) # 0.25#
min_stream_length = float(min_stream_length) # 0.5#

input_catchments = input_catchments.dissolve(by='HydroID').reset_index()

## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")

# only reduce nwm catchments to mainstems if running mainstems
if extent == 'MS':
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]

input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
if input_nwmcat.feature_id.dtype != 'int':
input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat = input_nwmcat.set_index('feature_id')

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
Expand Down Expand Up @@ -89,6 +72,8 @@ def add_crosswalk(
crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance'])
crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id')

del input_nwmflows

if crosswalk.empty:
print("No relevant streams within HUC boundaries.")
sys.exit(FIM_exit_codes.NO_VALID_CROSSWALKS.value)
Expand All @@ -97,6 +82,8 @@ def add_crosswalk(
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(crosswalk, on='HydroID')

del input_catchments

if output_catchments.empty:
print("No valid catchments remain.")
sys.exit(FIM_exit_codes.NO_VALID_CROSSWALKS.value)
Expand All @@ -105,6 +92,8 @@ def add_crosswalk(
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(crosswalk, on='HydroID')

del input_flows

# added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
if 'areasqkm' not in output_catchments.columns:
output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
Expand Down Expand Up @@ -276,6 +265,9 @@ def add_crosswalk(
input_src_base['Bathymetry_source'] = pd.NA

output_src = input_src_base.drop(columns=['CatchId']).copy()

del input_src_base

if output_src.HydroID.dtype != 'int':
output_src.HydroID = output_src.HydroID.astype(int)

Expand Down Expand Up @@ -304,6 +296,9 @@ def add_crosswalk(
)
merged_output_src = merged_output_src[['HydroID', 'Stage', 'Discharge (m3s-1)_df2']]
output_src = pd.merge(output_src, merged_output_src, on=['HydroID', 'Stage'], how='left')

del merged_output_src

output_src['Discharge (m3s-1)'] = output_src['Discharge (m3s-1)_df2'].fillna(
output_src['Discharge (m3s-1)']
)
Expand All @@ -322,8 +317,12 @@ def add_crosswalk(
['Discharge (m3s-1)'],
] = src_stage[1]

del sml_segs

output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')

del crosswalk

output_crosswalk = output_src[['HydroID', 'feature_id']]
output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)

Expand Down Expand Up @@ -386,6 +385,8 @@ def add_crosswalk(
input_huc[FIM_ID] = input_huc[FIM_ID].astype(str)
output_hydro_table = output_hydro_table.merge(input_huc.loc[:, [FIM_ID, 'HUC8']], how='left', on=FIM_ID)

del input_huc

if output_flows.HydroID.dtype != 'str':
output_flows.HydroID = output_flows.HydroID.astype(str)
output_hydro_table = output_hydro_table.merge(
Expand Down Expand Up @@ -428,6 +429,8 @@ def add_crosswalk(
with open(output_src_json_fileName, 'w') as f:
json.dump(output_src_json, f, sort_keys=True, indent=2)

del output_catchments, output_flows, output_src, output_crosswalk, output_hydro_table, output_src_json


if __name__ == '__main__':
parser = argparse.ArgumentParser(
Expand All @@ -452,22 +455,16 @@ def add_crosswalk(
parser.add_argument("-t", "--output-hydro-table-fileName", help="Hydrotable", required=True)
parser.add_argument("-w", "--input-huc-fileName", help="HUC8 boundary", required=True)
parser.add_argument("-b", "--input-nwmflows-fileName", help="Subest NWM burnlines", required=True)
parser.add_argument("-y", "--input-nwmcatras-fileName", help="NWM catchment raster", required=False)
parser.add_argument(
"-m",
"--mannings-n",
help="Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.",
required=True,
)
parser.add_argument("-u", "--huc-id", help="HUC ID", required=False)
parser.add_argument("-z", "--input-nwmcat-fileName", help="NWM catchment polygon", required=True)
parser.add_argument("-p", "--extent", help="GMS only for now", default="GMS", required=False)
parser.add_argument("-u", "--huc-id", help="HUC ID", required=True)
parser.add_argument(
"-k", "--small-segments-filename", help="output list of short segments", required=True
)
parser.add_argument(
"-c", "--calibration-mode", help="Mannings calibration flag", required=False, action="store_true"
)
parser.add_argument("-e", "--min-catchment-area", help="Minimum catchment area", required=True)
parser.add_argument("-g", "--min-stream-length", help="Minimum stream length", required=True)

Expand Down
4 changes: 4 additions & 0 deletions src/adjust_thalweg_lateral.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w
ndv,
)

del elevation_window, zone_window, cost_window

# --------------------------------------------------------------------------------------------- #

# Specify raster object metadata.
Expand All @@ -124,6 +126,8 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w

dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1)

del dem_window, zone_window, thalweg_window, minimized_dem_window


if __name__ == '__main__':
# Parse arguments.
Expand Down
2 changes: 0 additions & 2 deletions src/delineate_hydros_and_produce_HAND.sh
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,7 @@ python3 $srcDir/add_crosswalk.py \
-w $tempHucDataDir/wbd8_clp.gpkg \
-b $b_arg \
-u $hucNumber \
-y $tempCurrentBranchDataDir/nwm_catchments_proj_subset.tif \
-m $manning_n \
-z $z_arg \
-k $tempCurrentBranchDataDir/small_segments_$current_branch_id.csv \
-e $min_catchment_area \
-g $min_stream_length
Expand Down
6 changes: 6 additions & 0 deletions src/filter_catchments_and_add_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ def filter_catchments_and_add_attributes(
# filter segments within huc boundary
select_flows = tuple(map(str, map(int, wbd[wbd.HUC8.str.contains(huc_code)][FIM_ID])))

del wbd

if input_flows.HydroID.dtype != 'str':
input_flows.HydroID = input_flows.HydroID.astype(str)
output_flows = input_flows[input_flows.HydroID.str.startswith(select_flows)].copy()

del input_flows

if output_flows.HydroID.dtype != 'int':
output_flows.HydroID = output_flows.HydroID.astype(int)

Expand Down Expand Up @@ -73,6 +77,8 @@ def filter_catchments_and_add_attributes(
print("There are no flowlines in the HUC after stream order filtering.")
sys.exit(FIM_exit_codes.NO_FLOWLINES_EXIST.value) # will send a 61 back

del input_catchments


if __name__ == '__main__':
# Parse arguments.
Expand Down
118 changes: 48 additions & 70 deletions src/getRasterInfoNative.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

# TODO standardize this script

import os.path
import sys
import argparse

from osgeo import gdal, osr
import rasterio as rio
from osgeo import osr


gdal.UseExceptions()
osr.UseExceptions()


"""
Expand Down Expand Up @@ -64,69 +64,47 @@ def ReprojectCoords(coords, src_srs, tgt_srs):
return trans_coords


# get file size function
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Y', suffix)


# open dataset
ds = gdal.Open(sys.argv[1])
fsize = sizeof_fmt(os.path.getsize(sys.argv[1]))
cols = ds.RasterXSize
rows = ds.RasterYSize
nodata = ds.GetRasterBand(1).GetNoDataValue()
# stats = ds.GetRasterBand(1).GetStatistics(True, True)

gt = ds.GetGeoTransform()
ext = GetExtent(gt, cols, rows)

src_srs = osr.SpatialReference()
src_srs.ImportFromWkt(ds.GetProjection())
tgt_srs = src_srs.CloneGeogCS()

geo_ext = ReprojectCoords(ext, src_srs, tgt_srs)
lon1 = ext[0][0]
lon2 = ext[2][0]
lat1 = ext[2][1]
lat2 = ext[0][1]

# calculate cellsize
resx = (ext[2][0] - ext[0][0]) / cols
resy = (ext[0][1] - ext[2][1]) / rows

# print out RasterInfos
print(
fsize,
cols,
rows,
nodata,
str(lon1),
str(lat1),
str(lon2),
str(lat2),
"{:.15f}".format(resx),
"{:.15f}".format(resy),
)
# print stats[0],
# print stats[1],
# print stats[2],
# print stats[3],
# print "\"" + str(lat1) + "," + str(lon1) + "," + str(lat2) + "," + str(lon2) + "\""
# print str(lon1),
# print str(lat1),
# print str(lon2),
# print str(lat2),

# if resx < 0.01: # unit is degree
# print "%.15f" % (resx),
# print "%.15f" % (resy)
# else:
# print "%.15f" % (resx),
# print "%.15f" % (resy)

# close dataset
ds = None
def get_raster_info(raster):
# open dataset
with rio.open(raster) as ds:
meta = ds.meta

# fsize = sizeof_fmt(os.path.getsize(sys.argv[1]))
cols = meta.get("width")
rows = meta.get("height")
nodata = meta.get("nodata")

gt = meta.get("transform") # (1336251.3593209502, 10.0, 0.0, 657665.814333962, 0.0, -10.0)

ext = GetExtent(gt.to_gdal(), cols, rows)

lon1 = ext[0][0]
lon2 = ext[2][0]
lat1 = ext[2][1]
lat2 = ext[0][1]

# calculate cellsize
resx = (ext[2][0] - ext[0][0]) / cols
resy = (ext[0][1] - ext[2][1]) / rows

# print out RasterInfos
print(
cols,
rows,
nodata,
str(lon1),
str(lat1),
str(lon2),
str(lat2),
"{:.15f}".format(resx),
"{:.15f}".format(resy),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Get raster information")
parser.add_argument("-r", "--raster", help="Raster file to get information from")

args = parser.parse_args()

get_raster_info(**vars(args))
Loading

0 comments on commit 5e20284

Please sign in to comment.