From 0fcf2ea0cb587b308259d5bae38441f419c62155 Mon Sep 17 00:00:00 2001 From: Carson Davis Date: Mon, 25 Nov 2024 19:01:36 -0600 Subject: [PATCH] handle network errors and arbitrary errors during title resolution --- sde_collections/models/delta_patterns.py | 2 +- sde_collections/utils/title_resolver.py | 46 +++++++++++++----------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/sde_collections/models/delta_patterns.py b/sde_collections/models/delta_patterns.py index 0d2310a3..ae3e92ea 100644 --- a/sde_collections/models/delta_patterns.py +++ b/sde_collections/models/delta_patterns.py @@ -425,7 +425,7 @@ def generate_title_for_url(self, url_obj) -> tuple[str, str | None]: try: return resolve_title(self.title_pattern, context), None - except (ValueError, ValidationError) as e: + except Exception as e: return None, str(e) def apply(self) -> None: diff --git a/sde_collections/utils/title_resolver.py b/sde_collections/utils/title_resolver.py index 20211bf7..165065d9 100644 --- a/sde_collections/utils/title_resolver.py +++ b/sde_collections/utils/title_resolver.py @@ -63,29 +63,33 @@ def resolve_xpath(xpath: str, url: str) -> str: if not is_valid_xpath(xpath): raise ValueError(f"The xpath, {xpath}, is not valid.") - response = requests.get(url) - - if response.ok: - tree = html.fromstring(response.content) - values = tree.xpath(xpath) - - if len(values) == 1: - if isinstance(values[0], str): - text_content = values[0] - else: - text_content = values[0].text - - if text_content: - text_content = clean_text(text_content) - return text_content + try: + response = requests.get(url) + + if response.ok: + tree = html.fromstring(response.content) + values = tree.xpath(xpath) + + if len(values) == 1: + if isinstance(values[0], str): + text_content = values[0] + else: + text_content = values[0].text + + if text_content: + text_content = clean_text(text_content) + return text_content + else: + raise ValueError(f"The element at the xpath, {xpath}, does not contain any text content.") + elif len(values) > 1: + raise ValueError(f"More than one element found for the xpath, {xpath}") else: - raise ValueError(f"The element at the xpath, {xpath}, does not contain any text content.") - elif len(values) > 1: - raise ValueError(f"More than one element found for the xpath, {xpath}") + raise ValueError(f"No element found for the xpath, {xpath}") else: - raise ValueError(f"No element found for the xpath, {xpath}") - else: - raise ValueError(f"Failed to retrieve the {url}. Status code: {response.status_code}") + raise ValueError(f"Failed to retrieve the {url}. Status code: {response.status_code}") + + except requests.RequestException as e: + raise ValueError(f"Network error while accessing {url}: {str(e)}") def parse_title(input_string: str) -> list[tuple[str, str]]: