From f8eff5f31e147bcd05295d9d472d4352784e6ef5 Mon Sep 17 00:00:00 2001 From: JoJo Date: Fri, 17 May 2024 14:23:02 -0400 Subject: [PATCH] Send database instance metadata prior to check queries (#17590) * Send database instance metadata prior to check queries --- postgres/changelog.d/17590.fixed | 1 + postgres/datadog_checks/postgres/postgres.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 postgres/changelog.d/17590.fixed diff --git a/postgres/changelog.d/17590.fixed b/postgres/changelog.d/17590.fixed new file mode 100644 index 0000000000000..57ad924bfaa58 --- /dev/null +++ b/postgres/changelog.d/17590.fixed @@ -0,0 +1 @@ +Send database instance metadata prior to check queries. This prevents a scenario where exceptions thrown during check execution can cause the database_instance resource to not be emitted. When the resource is not emitted, this can cause flapping in host tags. For example, a customer might see dbms:N/A for a period of time until a new database_instance resource is created. Moving this means it will always be sent, even if an unexepected exception is thrown during check execution. diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index d321b20e32f85..4d251adcc9a22 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -1012,7 +1012,7 @@ def debug_stats_kwargs(self, tags=None): def check(self, _): tags = copy.copy(self.tags) self.tags_without_db = [t for t in copy.copy(self.tags) if not t.startswith("db:")] - # Collect metrics + tags_to_add = [] try: # Check version self._connect() @@ -1021,17 +1021,18 @@ def check(self, _): # Add raw version as a tag tags.append(f'postgresql_version:{self.raw_version}') - self.tags_without_db.append(f'postgresql_version:{self.raw_version}') + tags_to_add.append(f'postgresql_version:{self.raw_version}') # Add system identifier as a tag self.load_system_identifier() tags.append(f'system_identifier:{self.system_identifier}') - self.tags_without_db.append(f'system_identifier:{self.system_identifier}') - + tags_to_add.append(f'system_identifier:{self.system_identifier}') if self._config.tag_replication_role: replication_role_tag = "replication_role:{}".format(self._get_replication_role()) tags.append(replication_role_tag) - self.tags_without_db.append(replication_role_tag) + tags_to_add.append(replication_role_tag) + self._update_tag_sets(tags_to_add) + self._send_database_instance_metadata() self.log.debug("Running check against version %s: is_aurora: %s", str(self.version), str(self.is_aurora)) self._emit_running_metric() @@ -1044,7 +1045,6 @@ def check(self, _): if self._config.collect_wal_metrics: # collect wal metrics for pg < 10, disabled by enabled self._collect_wal_metrics() - self._send_database_instance_metadata() except Exception as e: self.log.exception("Unable to collect postgres metrics.") self._clean_state() @@ -1069,3 +1069,7 @@ def check(self, _): finally: # Add the warnings saved during the execution of the check self._report_warnings() + + def _update_tag_sets(self, tags): + self._non_internal_tags = list(set(self._non_internal_tags) | set(tags)) + self.tags_without_db = list(set(self.tags_without_db) | set(tags))