From 8b4a4fadd8a9f2be5cc4c862adf49bfd44668aa1 Mon Sep 17 00:00:00 2001 From: Smail KOURTA Date: Fri, 18 Oct 2024 15:32:38 +0400 Subject: [PATCH] [DPE-5671] - Logic change in how check if ca rotation in complete in cluster (#486) ## Issue Fixes #482 If a unit finishes updating its CA before any of the other unit start the rotation then this unit will update its certificates and that will cause communication issues with the rest of the nodes ## Solution Check if there's a `tls_ca_renewing` set on any of the units then consider the rotation started. This pull request focuses on enhancing the logic for checking the status of CA (Certificate Authority) rotation in the `opensearch_tls.py` module. The changes aim to improve the accuracy and clarity of determining whether the CA rotation process is complete across all units in the cluster. ### Enhancements to CA Rotation Logic: * **Updated Condition Checks:** - Replaced `is_ca_rotation_ongoing` with `ca_rotation_complete_in_cluster` to ensure the correct status is checked before deferring events and updating TLS resources. (`lib/charms/opensearch/v0/opensearch_tls.py`) [[1]](diffhunk://#diff-fcbdb57c37f5d32b5ecf046327cfa9ee91a33f156714ea7fb6f000a48d2c4ca4L213-R213) [[2]](diffhunk://#diff-fcbdb57c37f5d32b5ecf046327cfa9ee91a33f156714ea7fb6f000a48d2c4ca4L640-R640) * **New Method Implementation:** - Implemented `ca_rotation_complete_in_cluster` to accurately determine if CA rotation is complete by checking flags across all units. This method now tracks both ongoing and completed states more effectively. (`lib/charms/opensearch/v0/opensearch_tls.py`) * **Removed Redundant Method:** - Removed the `is_ca_rotation_ongoing` method as its functionality is now covered by the enhanced `ca_rotation_complete_in_cluster` method. (`lib/charms/opensearch/v0/opensearch_tls.py`) --- lib/charms/opensearch/v0/opensearch_tls.py | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/charms/opensearch/v0/opensearch_tls.py b/lib/charms/opensearch/v0/opensearch_tls.py index 017be0ff1..846a14dd0 100644 --- a/lib/charms/opensearch/v0/opensearch_tls.py +++ b/lib/charms/opensearch/v0/opensearch_tls.py @@ -210,7 +210,7 @@ def _on_certificate_available(self, event: CertificateAvailableEvent) -> None: if not self.charm.unit.is_leader() and scope == Scope.APP: return - if self.is_ca_rotation_ongoing(): + if not self.ca_rotation_complete_in_cluster(): event.defer() return @@ -637,7 +637,7 @@ def update_request_ca_bundle(self) -> None: def store_new_tls_resources(self, cert_type: CertType, secrets: Dict[str, Any]): """Add key and cert to keystore.""" - if self.is_ca_rotation_ongoing(): + if not self.ca_rotation_complete_in_cluster(): return cert_name = cert_type.val @@ -860,7 +860,17 @@ def reset_ca_rotation_state(self) -> None: def ca_rotation_complete_in_cluster(self) -> bool: """Check whether the CA rotation completed in all units.""" + rotation_happening = False rotation_complete = True + # check current unit + if self.charm.peers_data.get(Scope.UNIT, "tls_ca_renewing", False): + rotation_happening = True + if not self.charm.peers_data.get(Scope.UNIT, "tls_ca_renewed", False): + logger.debug( + f"TLS CA rotation ongoing in unit: {self.charm.unit.name}, will not update tls certificates." + ) + rotation_complete = False + for relation_type in [ PeerRelationName, PeerClusterRelationName, @@ -868,14 +878,17 @@ def ca_rotation_complete_in_cluster(self) -> bool: ]: for relation in self.model.relations[relation_type]: for unit in relation.units: - if relation.data[unit].get("tls_ca_renewing") and not relation.data[unit].get( - "tls_ca_renewed" - ): - logger.debug(f"TLS CA rotation not complete for unit {unit}.") + if relation.data[unit].get("tls_ca_renewing"): + rotation_happening = True + + if not relation.data[unit].get("tls_ca_renewed"): + logger.debug( + f"TLS CA rotation ongoing in unit {unit}, will not update tls certificates." + ) rotation_complete = False - break - return rotation_complete + # if no unit is renewing the CA, or all of them renewed it, the rotation is complete + return not rotation_happening or rotation_complete def ca_and_certs_rotation_complete_in_cluster(self) -> bool: """Check whether the CA rotation completed in all units.""" @@ -916,19 +929,6 @@ def ca_and_certs_rotation_complete_in_cluster(self) -> bool: break return rotation_complete - def is_ca_rotation_ongoing(self) -> bool: - """Check whether the CA rotation is currently in progress.""" - if ( - self.charm.peers_data.get(Scope.UNIT, "tls_ca_renewing", False) - and not self.charm.peers_data.get(Scope.UNIT, "tls_ca_renewed", False) - or self.charm.peers_data.get(Scope.UNIT, "tls_ca_renewed", False) - and not self.ca_rotation_complete_in_cluster() - ): - logger.debug("TLS CA rotation ongoing, will not update tls certificates.") - return True - - return False - def update_ca_rotation_flag_to_peer_cluster_relation(self, flag: str, operation: str) -> None: """Add or remove a CA rotation flag to all related peer clusters in large deployments.""" for relation_type in [PeerClusterRelationName, PeerClusterOrchestratorRelationName]: