From 2bf12372fc6292c8589d7511212b8fe0a3765a6c Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Mon, 15 Apr 2024 18:39:41 +0200 Subject: [PATCH 1/3] Add error handling to _request --- .../opensearch/v0/opensearch_backups.py | 56 +++++++++++++------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/lib/charms/opensearch/v0/opensearch_backups.py b/lib/charms/opensearch/v0/opensearch_backups.py index ad3275da8..44b696fdf 100644 --- a/lib/charms/opensearch/v0/opensearch_backups.py +++ b/lib/charms/opensearch/v0/opensearch_backups.py @@ -252,7 +252,7 @@ def _close_indices_if_needed(self, backup_id: int) -> Set[str]: OpenSearchHttpError OpenSearchRestoreIndexClosingError """ - backup_indices = self._list_backups()[backup_id]["indices"] + backup_indices = self._list_backups().get(backup_id, {}).get("indices", {}) indices_to_close = set() for index, state in ClusterState.indices(self.charm.opensearch).items(): if ( @@ -271,7 +271,7 @@ def _close_indices_if_needed(self, backup_id: int) -> Set[str]: def _restore(self, backup_id: int) -> Dict[str, Any]: """Runs the restore and processes the response.""" - backup_indices = self._list_backups()[backup_id]["indices"] + backup_indices = self._list_backups().get(backup_id, {}).get("indices", {}) output = self._request( "POST", f"_snapshot/{S3_REPOSITORY}/{backup_id}/_restore?wait_for_completion=true", @@ -301,6 +301,8 @@ def _is_restore_complete(self) -> bool: Essentially, check for each index shard: for all type=SNAPSHOT and stage=DONE, return True. """ indices_status = self._request("GET", "/_recovery?human") + if not indices_status: + raise OpenSearchRestoreCheckError("_is_restore_complete: failed to get indices status") for info in indices_status.values(): # Now, check the status of each shard for shard in info["shards"]: @@ -320,13 +322,17 @@ def _is_backup_available_for_restore(self, backup_id: int) -> bool: except OpenSearchListBackupError: return False - def _on_restore_backup_action(self, event: ActionEvent) -> None: + def _on_restore_backup_action(self, event: ActionEvent) -> None: # noqa #C901 """Restores a backup to the current cluster.""" if not self._can_unit_perform_backup(event): event.fail("Failed: backup service is not configured yet") return - if not self._is_restore_complete(): - event.fail("Failed: previous restore is still in progress") + try: + if not self._is_restore_complete(): + event.fail("Failed: previous restore is still in progress") + return + except OpenSearchRestoreCheckError: + event.fail("Failed: error connecting to the cluster") return # Now, validate the backup is working backup_id = str(event.params.get("backup-id")) @@ -365,7 +371,13 @@ def _on_restore_backup_action(self, event: ActionEvent) -> None: event.fail("Failed to restore all the shards") return - msg = "Restore is complete" if self._is_restore_complete() else "Restore in progress..." + try: + msg = ( + "Restore is complete" if self._is_restore_complete() else "Restore in progress..." + ) + except OpenSearchRestoreCheckError: + event.fail("Failed: error connecting to the cluster") + return self.charm.status.clear(RestoreInProgress) event.set_results( {"backup-id": backup_id, "status": msg, "closed-indices": str(closed_idx)} @@ -430,7 +442,9 @@ def _can_unit_perform_backup(self, event: ActionEvent) -> bool: def _list_backups(self) -> Dict[int, str]: """Returns a mapping of snapshot ids / state.""" - response = self._request("GET", f"_snapshot/{S3_REPOSITORY}/_all") + # Using the original request method, as we want to raise an http exception if we + # cannot get the snapshot list. + response = self.charm.opensearch.request("GET", f"_snapshot/{S3_REPOSITORY}/_all") return { snapshot["snapshot"]: { "state": snapshot["state"], @@ -685,30 +699,35 @@ def can_use_s3_repository(self) -> bool: return False return True - def _request(self, *args, **kwargs) -> str: + def _request(self, *args, **kwargs) -> str | None: """Returns the output of OpenSearchDistribution.request() or throws an error. Request method can return one of many: Union[Dict[str, any], List[any], int] and raise multiple types of errors. If int is returned, then throws an exception informing the HTTP request failed. + If the request fails, returns the error text or None if only status code is found. Raises: - ValueError - - OpenSearchHttpError """ if "retries" not in kwargs.keys(): kwargs["retries"] = 6 if "timeout" not in kwargs.keys(): kwargs["timeout"] = 10 - result = self.charm.opensearch.request(*args, **kwargs) - - # If the return is an int type, then there was a request error: - if isinstance(result, int): - raise OpenSearchHttpError(f"Request failed with code {result}") + # We are interested to see the entire response + kwargs["resp_status_code"] = False + try: + result = self.charm.opensearch.request(*args, **kwargs) + except OpenSearchHttpError as e: + if not e.response_text: + return None + return e.response_text return result - def get_service_status(self, response: Dict[str, Any]) -> BackupServiceState: # noqa: C901 + def get_service_status( # noqa: C901 + self, response: dict[str, Any] | None + ) -> BackupServiceState: """Returns the response status in a Enum. Based on: @@ -719,6 +738,9 @@ def get_service_status(self, response: Dict[str, Any]) -> BackupServiceState: # ba78d93acf1da6dae16952d8978de87cb4df2c61/ plugins/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/40_repository_ec2_credentials.yml """ + if not response: + return BackupServiceState.SNAPSHOT_FAILED_UNKNOWN + try: if "error" not in response: return BackupServiceState.SUCCESS @@ -759,8 +781,10 @@ def get_service_status(self, response: Dict[str, Any]) -> BackupServiceState: # # Ensure this is not containing any information about snapshots, return SUCCESS return self.get_snapshot_status(response) - def get_snapshot_status(self, response: Dict[str, Any]) -> BackupServiceState: + def get_snapshot_status(self, response: Dict[str, Any] | None) -> BackupServiceState: """Returns the snapshot status.""" + if not response: + return BackupServiceState.SNAPSHOT_FAILED_UNKNOWN # Now, check snapshot status: r_str = str(response) if "IN_PROGRESS" in r_str: From ab00d33797e8108f8c5ed96bae4e96eec864b88b Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Mon, 15 Apr 2024 20:47:34 +0200 Subject: [PATCH 2/3] Fix backup stale message --- lib/charms/opensearch/v0/opensearch_backups.py | 7 +++---- tests/integration/ha/test_backups.py | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/charms/opensearch/v0/opensearch_backups.py b/lib/charms/opensearch/v0/opensearch_backups.py index 44b696fdf..80e5f0476 100644 --- a/lib/charms/opensearch/v0/opensearch_backups.py +++ b/lib/charms/opensearch/v0/opensearch_backups.py @@ -555,6 +555,7 @@ def apply_api_config_if_needed(self) -> None: if state != BackupServiceState.SUCCESS: logger.error(f"Failed to setup backup service with state {state}") self.charm.status.set(BlockedStatus(BackupSetupFailed), app=True) + self.charm.status.clear(BackupConfigureStart) return self.charm.status.clear(BackupSetupFailed, app=True) self.charm.status.clear(BackupConfigureStart) @@ -720,10 +721,8 @@ def _request(self, *args, **kwargs) -> str | None: try: result = self.charm.opensearch.request(*args, **kwargs) except OpenSearchHttpError as e: - if not e.response_text: - return None - return e.response_text - return result + return e.response_body if e.response_body else None + return result if isinstance(result, dict) else None def get_service_status( # noqa: C901 self, response: dict[str, Any] | None diff --git a/tests/integration/ha/test_backups.py b/tests/integration/ha/test_backups.py index b277a2619..e2ff83768 100644 --- a/tests/integration/ha/test_backups.py +++ b/tests/integration/ha/test_backups.py @@ -545,6 +545,7 @@ async def test_wrong_s3_credentials(ops_test: OpsTest) -> None: apps=[app], apps_statuses=["blocked"], units_statuses=["active"], + wait_for_exact_units=3, idle_period=30, ) From d069f442aaede72b033f180bfd2da1b581a39a14 Mon Sep 17 00:00:00 2001 From: phvalguima Date: Tue, 16 Apr 2024 09:22:10 +0200 Subject: [PATCH 3/3] Update opensearch_backups.py --- lib/charms/opensearch/v0/opensearch_backups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/charms/opensearch/v0/opensearch_backups.py b/lib/charms/opensearch/v0/opensearch_backups.py index 80e5f0476..d220d18d3 100644 --- a/lib/charms/opensearch/v0/opensearch_backups.py +++ b/lib/charms/opensearch/v0/opensearch_backups.py @@ -700,7 +700,7 @@ def can_use_s3_repository(self) -> bool: return False return True - def _request(self, *args, **kwargs) -> str | None: + def _request(self, *args, **kwargs) -> dict[str, Any] | None: """Returns the output of OpenSearchDistribution.request() or throws an error. Request method can return one of many: Union[Dict[str, any], List[any], int]