diff --git a/ras_stac/__init__.py b/ras_stac/__init__.py index e69de29..da5c0a7 100644 --- a/ras_stac/__init__.py +++ b/ras_stac/__init__.py @@ -0,0 +1 @@ +"""Main modules for the ras_stac package.""" diff --git a/ras_stac/ras_geom_hdf.py b/ras_stac/ras_geom_hdf.py index 2a18f24..2f9c550 100644 --- a/ras_stac/ras_geom_hdf.py +++ b/ras_stac/ras_geom_hdf.py @@ -1,3 +1,5 @@ +"""Module for creating STAC items from HEC-RAS geometry HDF files.""" + from .utils.logger import setup_logging import logging import sys @@ -29,6 +31,7 @@ def new_geom_item( item_props_to_add: dict = None, s3_resource=None, ): + """Create a new STAC item from a HEC-RAS geometry HDF object.""" ras_stac_geom = RasStacGeom(ras_geom_hdf) stac_properties = ras_stac_geom.get_stac_geom_attrs() @@ -80,7 +83,7 @@ def main( item_props_to_remove: list = None, item_props_to_add: dict = None, ): - + """Create stac item from papipyplug input.""" verify_safe_prefix(new_item_s3_path) logging.info(f"Creating geom item: {new_item_s3_path}") diff --git a/ras_stac/ras_plan_hdf.py b/ras_stac/ras_plan_hdf.py index 5ee8170..74d43d5 100644 --- a/ras_stac/ras_plan_hdf.py +++ b/ras_stac/ras_plan_hdf.py @@ -1,3 +1,5 @@ +"""Module for creating STAC items from HEC-RAS plan HDF files.""" + from .utils.logger import setup_logging import logging import sys @@ -25,6 +27,7 @@ def new_plan_item( item_props_to_add: dict = {}, s3_resource=None, ): + """Create a new STAC item from a HEC-RAS plan HDF object.""" ras_stac_plan = RasStacPlan(plan_hdf_obj) stac_properties = ras_stac_plan.get_stac_plan_attrs(item_id) @@ -61,9 +64,7 @@ def main( item_props_to_add: dict = None, item_props_to_remove: list = None, ): - """ - Main function with individual parameters instead of using a dict. - """ + """Create stac item from papipyplug input.""" # Handle optional parameters asset_list = asset_list or [] item_props_to_add = item_props_to_add or {} diff --git a/ras_stac/utils/__init__.py b/ras_stac/utils/__init__.py index e69de29..51d015f 100644 --- a/ras_stac/utils/__init__.py +++ b/ras_stac/utils/__init__.py @@ -0,0 +1 @@ +"""Utility modules for the ras_stac package.""" diff --git a/ras_stac/utils/common.py b/ras_stac/utils/common.py index 64c888c..e97ef14 100644 --- a/ras_stac/utils/common.py +++ b/ras_stac/utils/common.py @@ -1,8 +1,11 @@ +"""Common utility functions & constants for ras_stac.""" + from typing import List, Any import inspect def check_params(func): + """Inspect the parameters of a given function and categorize them as required or optional.""" sig = inspect.signature(func) args = sig.parameters @@ -17,16 +20,18 @@ def check_params(func): def get_dict_values(dicts: List[dict], key: Any) -> list: - """ - This function retrieves the values of a specific key from a list of dictionaries. + """Retrieve the values of a specific key from a list of dictionaries. - Parameters: + Parameters + ---------- dicts (List[dict]): The list of dictionaries. key (Any): The key to retrieve the values of. - Returns: + Returns + ------- List[dict]: A list with the values of the key in the dictionaries. If a dictionary does not have the key, it is skipped. + """ results = [] for d in dicts: diff --git a/ras_stac/utils/logger.py b/ras_stac/utils/logger.py index 159ecbb..2fb2b41 100644 --- a/ras_stac/utils/logger.py +++ b/ras_stac/utils/logger.py @@ -1,7 +1,10 @@ +"""Logging setup and configuration for ras_stac.""" + import logging def setup_logging(): + """Configure logging settings.""" logging.getLogger("boto3").setLevel(logging.WARNING) logging.getLogger("botocore").setLevel(logging.WARNING) diff --git a/ras_stac/utils/ras_utils.py b/ras_stac/utils/ras_utils.py index 4662d1b..f31cc38 100644 --- a/ras_stac/utils/ras_utils.py +++ b/ras_stac/utils/ras_utils.py @@ -1,3 +1,5 @@ +"""Utility functions for processing STAC data.""" + import logging from dotenv import load_dotenv, find_dotenv import pystac @@ -19,17 +21,19 @@ class RasStacGeom: + """Class for creating STAC items from HEC-RAS geometry HDF files.""" + def __init__(self, rg: RasGeomHdf): self.rg = rg def get_stac_geom_attrs(self) -> dict: - """ - Retrieves the geometry attributes of a HEC-RAS HDF file, converting them to STAC format. + """Retrieve the geometry attributes of a HEC-RAS HDF file, converting them to STAC format. - Returns: + Returns + ------- stac_geom_attrs (dict): A dictionary with the organized geometry attributes. - """ + """ stac_geom_attrs = self.rg.get_root_attrs() if stac_geom_attrs is not None: stac_geom_attrs = prep_stac_attrs(stac_geom_attrs) @@ -74,6 +78,7 @@ def get_stac_geom_attrs(self) -> dict: return stac_geom_attrs def get_perimeter(self, simplify: float = None, crs: str = "EPSG:4326"): + """Retrieve the perimeter of a HEC-RAS geometry.""" return ras_perimeter(self.rg, simplify, crs) def to_item( @@ -82,18 +87,20 @@ def to_item( stac_item_id: str, simplify: float = None, ) -> pystac.Item: - """ - Creates a STAC (SpatioTemporal Asset Catalog) item from a given RasGeomHdf object. + """Create a STAC (SpatioTemporal Asset Catalog) item from a given RasGeomHdf object. - Parameters: + Parameters + ---------- item_properties (dict): A dictionary containing the properties for the STAC item. Should include geometry time or runtime window for STAC item date. ras_model_name (str): The name of the RAS model. simplify (float, optional): Tolerance for simplifying the perimeter polygon. Defaults to None. - Returns: + Returns + ------- pystac.Item: The created STAC item. - Raises: + Raises + ------ AttributeError: Raised if neither 'geometry_time' nor 'runtime_window' is present in the provided item properties. The function performs the following steps: @@ -103,8 +110,8 @@ def to_item( 4. If no runtime window is available, it uses the geometry time as the datetime for the STAC item. 5. Converts the perimeter geometry to GeoJSON format and adds it to the STAC item. 6. Returns the created STAC item with the geometry, bounding box, temporal properties, and additional item properties. - """ + """ perimeter_polygon = self.get_perimeter(simplify) runtime_window = item_properties.get("results_summary:run_time_window") @@ -151,20 +158,24 @@ def to_item( class RasStacPlan(RasStacGeom): + """Class for creating STAC items from HEC-RAS plan HDF files.""" + def __init__(self, rp: RasPlanHdf): super().__init__(rp) self.rp = rp def get_plan_attrs(self, include_results: bool = False) -> dict: - """ - This function retrieves the attributes of a plan from a HEC-RAS plan HDF file, converting them to STAC format. + """Retrieve the attributes of a plan from a HEC-RAS plan HDF file, converting them to STAC format. - Parameters: + Parameters + ---------- include_results (bool, optional): Whether to include the results attributes in the returned dictionary. Defaults to False. - Returns: + Returns + ------- stac_plan_attrs (dict): A dictionary with the attributes of the plan. + """ stac_plan_attrs = self.rp.get_root_attrs() if stac_plan_attrs is not None: @@ -204,13 +215,12 @@ def get_plan_attrs(self, include_results: bool = False) -> dict: return stac_plan_attrs def get_plan_results_attrs(self): - """ - This function retrieves the results attributes of a plan from a HEC-RAS plan HDF file, converting - them to STAC format. For summary atrributes, it retrieves the total computation time, the run time window, - and the solution from it, and calculates the total computation time in minutes if it exists. + """Retrieve the results attributes of a plan from a HEC-RAS plan HDF file, converting them to STAC format. - Returns: + Returns + ------- results_attrs (dict): A dictionary with the results attributes of the plan. + """ results_attrs = {} @@ -263,13 +273,14 @@ def get_plan_results_attrs(self): return results_attrs def get_stac_plan_attrs(self, simulation: str) -> dict: - """ - This function retrieves the metadata of a simulation from a HEC-RAS plan HDF file. + """Retrieve the metadata of a simulation from a HEC-RAS plan HDF file. - Parameters: + Parameters + ---------- simulation (str): The name of the simulation. - Returns: + Returns + ------- dict: A dictionary with the metadata of the simulation. The function performs the following steps: @@ -277,6 +288,7 @@ def get_stac_plan_attrs(self, simulation: str) -> dict: 2. Tries to get the plan attributes from the RasPlanHdf object and update the `metadata` dictionary with them. 3. Tries to get the plan results attributes from the RasPlanHdf object and update the `metadata` dictionary with them. 4. Returns the `metadata` dictionary. + """ metadata = {"ras:simulation": simulation} @@ -296,13 +308,14 @@ def get_stac_plan_attrs(self, simulation: str) -> dict: def get_ras_asset_info(s3_key: str) -> dict: - """ - This function generates information about a HEC-RAS model asset including roles, descriptions, and titles. + """Generate information about a HEC-RAS model asset including roles, descriptions, and titles. - Parameters: + Parameters + ---------- s3_key (str): The S3 key of the asset. - Returns: + Returns + ------- dict: A dictionary with the roles, the description, and the title of the asset. The function performs the following steps: @@ -314,6 +327,7 @@ def get_ras_asset_info(s3_key: str) -> dict: and provides a descriptive message for the asset. If the file doesn't match a known pattern, the generic role "ras-file" is assigned. 5. Returns a dictionary with the roles, the description, and the title of the asset. + """ file_extension = Path(s3_key).suffix full_extension = s3_key.rsplit("/")[-1].split(".", 1)[1] @@ -503,14 +517,16 @@ def get_ras_asset_info(s3_key: str) -> dict: def to_snake_case(text): - """ - Convert a string to snake case, removing punctuation and other symbols. + """Convert a string to snake case, removing punctuation and other symbols. - Parameters: + Parameters + ---------- text (str): The string to be converted. - Returns: + Returns + ------- str: The snake case version of the string. + """ import re @@ -524,16 +540,17 @@ def to_snake_case(text): def prep_stac_attrs(attrs: dict, prefix: str = None) -> dict: - """ - Converts an unformatted HDF attributes dictionary to STAC format by converting values to snake case - and adding a prefix if one is given. + """Convert an unformatted HDF attributes dictionary to STAC format by converting values to snake case and adding a prefix if one is given. - Parameters: + Parameters + ---------- attrs (dict): Unformatted attribute dictionary. prefix (str): Optional prefix to be added to each key of formatted dictionary. - Returns: + Returns + ------- results (dict): The new attribute dictionary snake case values and prefix. + """ results = {} for k, value in attrs.items(): @@ -547,19 +564,20 @@ def prep_stac_attrs(attrs: dict, prefix: str = None) -> dict: def ras_perimeter(rg: RasGeomHdf, simplify: float = None, crs: str = "EPSG:4326"): - """ - Calculate the perimeter of a HEC-RAS geometry as a GeoDataFrame in the specified coordinate reference system. + """Calculate the perimeter of a HEC-RAS geometry as a GeoDataFrame in the specified coordinate reference system. - Parameters: + Parameters + ---------- rg (RasGeomHdf): A HEC-RAS geometry HDF file object which provides mesh areas. simplify (float, optional): A tolerance level to simplify the perimeter geometry to reduce complexity. If None, the geometry will not be simplified. Defaults to None. crs (str): The coordinate reference system which the perimeter geometry will be converted to. Defaults to "EPSG:4326". - Returns: + Returns + ------- gpd.GeoDataFrame: A GeoDataFrame containing the calculated perimeter polygon in the specified CRS. - """ + """ perimeter = rg.mesh_areas() perimeter = perimeter.to_crs(crs) if simplify: @@ -570,13 +588,16 @@ def ras_perimeter(rg: RasGeomHdf, simplify: float = None, crs: str = "EPSG:4326" def properties_to_isoformat(properties: dict): - """Converts datetime objects in properties to isoformat + """Convert datetime objects in properties to isoformat. - Parameters: + Parameters + ---------- properties (dict): Properties dictionary with datetime object values - Returns: + Returns + ------- properties (dict): Properties dictionary with datetime objects converted to isoformat + """ for k, v in properties.items(): if isinstance(v, list): @@ -589,10 +610,7 @@ def properties_to_isoformat(properties: dict): def add_assets_to_item(item, asset_list: list, s3_resource: None): - """ - Adds assets to a STAC item using the asset list and fetches metadata from S3. - """ - + """Add assets to a STAC item using the asset list and fetches metadata from S3.""" for asset_file in asset_list: bucket, asset_key = split_s3_path(asset_file) logging.info(f"Adding asset {asset_file} to item") @@ -622,13 +640,13 @@ def add_assets_to_item(item, asset_list: list, s3_resource: None): def cell_area_to_distance(item, properties_to_transform): - """ - Converts the given properties (representing area) to distance by taking the square root - of their values. Capitalizes '2d' to '2D' in the property names. + """Convert the given properties (representing area) to distance by taking the square root of their values. Capitalizes '2d' to '2D' in the property names. - Parameters: + Parameters + ---------- - item: The item thats having its properties transformed. - properties_to_transform: List of properties to transform. + """ for prop in properties_to_transform: capitalized_prop = prop.replace("2d", "2D") diff --git a/ras_stac/utils/s3_utils.py b/ras_stac/utils/s3_utils.py index 7d1ca37..fd4f7b6 100644 --- a/ras_stac/utils/s3_utils.py +++ b/ras_stac/utils/s3_utils.py @@ -1,3 +1,5 @@ +"""S3 utility functions for HEC-RAS HDF and STAC data.""" + import boto3 import botocore import json @@ -15,18 +17,21 @@ def read_geom_hdf_from_s3(ras_geom_hdf_url: str): - """ - Reads a RAS geometry HDF file from an S3 URL. + """Read a RAS geometry HDF file from an S3 URL. - Parameters: + Parameters + ---------- ras_geom_hdf_url (str): The URL of the RAS geometry HDF file. - Returns: + Returns + ------- geom_hdf_obj (RasGeomHdf): The RasGeomHdf object. ras_model_name (str): The RAS model name. - Raises: + Raises + ------ ValueError: If the provided URL does not have a '.hdf' suffix. + """ pattern = r".*\.g[0-9]{2}\.hdf$" if not re.fullmatch(pattern, ras_geom_hdf_url): @@ -43,17 +48,20 @@ def read_geom_hdf_from_s3(ras_geom_hdf_url: str): def read_plan_hdf_from_s3(ras_plan_hdf_url: str): - """ - Reads a RAS plan HDF file from an S3 URL. + """Read a RAS plan HDF file from an S3 URL. - Parameters: + Parameters + ---------- ras_plan_hdf_url (str): The URL of the RAS plan HDF file. - Returns: + Returns + ------- plan_hdf_obj (RasPlanHdf): The RasPlanHdf object. - Raises: + Raises + ------ ValueError: If the provided URL does not have a '.hdf' suffix. + """ pattern = r".*\.p[0-9]{2}\.hdf$" if not re.fullmatch(pattern, ras_plan_hdf_url): @@ -70,15 +78,17 @@ def read_plan_hdf_from_s3(ras_plan_hdf_url: str): def get_basic_object_metadata(obj: ObjectSummary) -> dict: - """ - This function retrieves basic metadata of an AWS S3 object. + """Retrieve basic metadata of an AWS S3 object. - Parameters: + Parameters + ---------- obj (ObjectSummary): The AWS S3 object. - Returns: + Returns + ------- dict: A dictionary with the size, ETag, last modified date, storage platform, region, and storage tier of the object. + """ try: _ = obj.load() @@ -97,10 +107,10 @@ def get_basic_object_metadata(obj: ObjectSummary) -> dict: def copy_item_to_s3(item, s3_path, s3client): - """ - This function copies an item to an AWS S3 bucket. + """Copy stac item to S3 bucket. - Parameters: + Parameters + ---------- item: The item to copy. It must have a `to_dict` method that returns a dictionary representation of it. s3_key (str): The file path in the S3 bucket to copy the item to. @@ -108,6 +118,7 @@ def copy_item_to_s3(item, s3_path, s3client): 1. Initializes a boto3 S3 client and splits the s3_key into the bucket name and the key. 2. Converts the item to a dictionary, serializes it to a JSON string, and encodes it to bytes. 3. Puts the encoded JSON string to the specified file path in the S3 bucket. + """ item_public_url = s3_path_public_url_converter(s3_path) item.set_self_href(item_public_url) @@ -120,13 +131,14 @@ def copy_item_to_s3(item, s3_path, s3client): def split_s3_path(s3_path: str) -> tuple[str, str]: - """ - This function splits an S3 path into the bucket name and the key. + """Split an S3 path into the bucket name and the key. - Parameters: + Parameters + ---------- s3_path (str): The S3 path to split. It should be in the format 's3://bucket/key'. - Returns: + Returns + ------- tuple: A tuple containing the bucket name and the key. If the S3 path does not contain a key, the second element of the tuple will be None. @@ -135,6 +147,7 @@ def split_s3_path(s3_path: str) -> tuple[str, str]: 2. Splits the remaining string on the first '/' character. 3. Returns the first part as the bucket name and the second part as the key. If there is no '/', the key will be None. + """ if not s3_path.startswith("s3://"): raise ValueError(f"s3_path does not start with s3://: {s3_path}") @@ -145,13 +158,14 @@ def split_s3_path(s3_path: str) -> tuple[str, str]: def s3_path_public_url_converter(url: str) -> str: - """ - This function converts an S3 URL to an HTTPS URL and vice versa. + """Convert an S3 URL to an HTTPS URL and vice versa. - Parameters: + Parameters + ---------- url (str): The URL to convert. It should be in the format 's3://bucket/' or 'https://bucket.s3.amazonaws.com/'. - Returns: + Returns + ------- str: The converted URL. If the input URL is an S3 URL, the function returns an HTTPS URL. If the input URL is an HTTPS URL, the function returns an S3 URL. @@ -159,8 +173,8 @@ def s3_path_public_url_converter(url: str) -> str: 1. Checks if the input URL is an S3 URL or an HTTPS URL. 2. If the input URL is an S3 URL, it converts it to an HTTPS URL. 3. If the input URL is an HTTPS URL, it converts it to an S3 URL. - """ + """ if url.startswith("s3"): bucket = url.replace("s3://", "").split("/")[0] key = url.replace(f"s3://{bucket}", "")[1:] @@ -177,10 +191,7 @@ def s3_path_public_url_converter(url: str) -> str: def verify_safe_prefix(s3_key: str): - """ - TODO: discuss this with the team. Would like some safety mechanism to ensure that the S3 key is limited to - certain prefixes. Should there be some restriction where these files can be written? - """ + """TODO: discuss this with the team. Would like some safety mechanism to ensure that the S3 key is limited to certain prefixes.""" parts = s3_key.split("/") logging.debug(f"parts of the s3_key: {parts}") if parts[3] != "stac": @@ -190,7 +201,7 @@ def verify_safe_prefix(s3_key: str): def init_s3_resources(): - # Instantitate S3 resources + """Create a Boto3 session using AWS credentials from environment variables and creates both an S3 client and S3 resource for interacting with AWS S3.""" session = boto3.Session( aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"], @@ -202,6 +213,19 @@ def init_s3_resources(): def list_keys(s3_client, bucket, prefix, suffix=""): + """List all S3 keys in a specified bucket that match a given prefix and suffix. + + Parameters + ---------- + s3_client (boto3.Client): An S3 client object. + bucket (str): The name of the S3 bucket. + prefix (str): The prefix string to filter keys. + suffix (str, optional): The suffix string to filter keys. Defaults to an empty string. + + Returns + ------- + list: A list of keys in the S3 bucket that match the specified prefix and suffix. + """ keys = [] kwargs = {"Bucket": bucket, "Prefix": prefix} while True: @@ -215,6 +239,19 @@ def list_keys(s3_client, bucket, prefix, suffix=""): def list_keys_regex(s3_client, bucket, prefix_includes, suffix=""): + """List all S3 keys in a specified bucket that match a given regex pattern and suffix. + + Parameters + ---------- + s3_client (boto3.Client): An S3 client object. + bucket (str): The name of the S3 bucket. + prefix_includes (str): A pattern to match the beginning of keys. + suffix (str, optional): The suffix string to filter keys. Defaults to an empty string. + + Returns + ------- + list: A list of keys in the S3 bucket that match the specified regex pattern and suffix. + """ keys = [] kwargs = {"Bucket": bucket, "Prefix": prefix_includes} prefix_pattern = re.compile(prefix_includes.replace("*", ".*")) diff --git a/tests/test_ras_geom.py b/tests/test_ras_geom.py index b95d28e..770ebd0 100644 --- a/tests/test_ras_geom.py +++ b/tests/test_ras_geom.py @@ -21,7 +21,6 @@ def test_geom_item(): - ras_geom_hdf = RasGeomHdf(TEST_GEOM) ras_model_name = "test_model" test_asset = "s3://test_bucket/test_prefix/test_model.f03" diff --git a/tests/test_ras_plan.py b/tests/test_ras_plan.py index 89e8932..c75b77c 100644 --- a/tests/test_ras_plan.py +++ b/tests/test_ras_plan.py @@ -16,7 +16,6 @@ def test_plan_item(): - ras_plan_hdf = RasPlanHdf(TEST_PLAN) ras_model_name = "test_model" test_asset = "s3://test_bucket/test_prefix/test_model.f03" diff --git a/tests/test_utils.py b/tests/test_utils.py index bb7f1b0..938efab 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,7 +7,7 @@ def stac_item_to_json(item: pystac.Item, filename: str): - """Writes a STAC item to a JSON file.""" + """Write a STAC item to a JSON file.""" item_json = json.dumps(item.to_dict(), indent=4) with open(filename, "w") as f: f.write(item_json) @@ -16,6 +16,7 @@ def stac_item_to_json(item: pystac.Item, filename: str): def create_perimeter_json( ras_stac_geom: RasStacGeom, output_json_fn: str = "test_perimeter.json" ): + """Create a JSON file with the spatial characteristics of a RAS geometry.""" perimeter = ras_stac_geom.get_perimeter() geometry = json.loads(shapely.to_geojson(perimeter)) bounds = perimeter.bounds