From 0d17eca88167f71f4ac34243f4e04974533f1c11 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Wed, 22 Nov 2023 16:39:15 +1100 Subject: [PATCH 1/6] fix: add missing pmie webhook action configuration functionality Resolves Red Hat issue RHEL-13760 --- README.md | 6 ++++++ defaults/main.yml | 5 +++++ tasks/main.yml | 1 + tests/check_notification.yml | 7 +++++++ tests/tests_verify_notification.yml | 27 +++++++++++++++++++++++++++ 5 files changed, 46 insertions(+) create mode 100644 tests/check_notification.yml create mode 100644 tests/tests_verify_notification.yml diff --git a/README.md b/README.md index 533054ef..27bed4e8 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,12 @@ Example: metrics_monitored_hosts: ["webserver.example.com", "database.example.com"] ``` +### metrics_webhook_endpoint: '' + +Webhook endpoint (URL) where notification about any automatically detected +performance issues are to be sent. By default, these events are logged to +the local system log only. + ### metrics_retention_days: 14 Retain historical performance data for the specified number of days; after diff --git a/defaults/main.yml b/defaults/main.yml index 6cc8668f..573f475d 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -46,3 +46,8 @@ metrics_manage_firewall: false # If true, manage the pmcd port, pmproxy ports using the selinux role # depending upon the configuration parameters. metrics_manage_selinux: false + +# Webhook endpoint (URL) where notification about any automatically detected +# performance issues are to be sent. By default, these events are logged to the +# local system log only. +metrics_webhook_endpoint: '' diff --git a/tasks/main.yml b/tasks/main.yml index 9bc942bb..edd0e4ec 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -84,6 +84,7 @@ - name: Setup metric collection service. vars: + pcp_pmie_endpoint: "{{ metrics_webhook_endpoint }}" pcp_pmlogger_discard: "{{ metrics_retention_days }}" pcp_target_hosts: "{{ metrics_monitored_hosts }}" pcp_optional_agents: "{{ __metrics_domains }}" diff --git a/tests/check_notification.yml b/tests/check_notification.yml new file mode 100644 index 00000000..f2e4a036 --- /dev/null +++ b/tests/check_notification.yml @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: MIT +--- +# yamllint disable rule:line-length +- name: Check if webhook notification is set + command: grep -E 'webhook_endpoint .* "?{{ __test_webhook }}"?' /var/lib/pcp/config/pmie/config.default + changed_when: false +# yamllint enable rule:line-length diff --git a/tests/tests_verify_notification.yml b/tests/tests_verify_notification.yml new file mode 100644 index 00000000..6b64473a --- /dev/null +++ b/tests/tests_verify_notification.yml @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: MIT +--- +- name: Test configuration of webhook notification + hosts: all + vars: + __test_webhook: https://example.com:4242/endpoint + + roles: + - role: linux-system-roles.metrics + vars: + metrics_webhook_endpoint: "{{ __test_webhook }}" + + pre_tasks: + - name: Save state of services + import_tasks: get_services_state.yml + + tasks: + - name: Check basic services and the webhook endpoint + include_tasks: "{{ item }}" + loop: + - check_pcp.yml + - check_pmie.yml + - check_notification.yml + + post_tasks: + - name: Restore state of services + import_tasks: restore_services_state.yml From 3d86c1c479a3496b9cca2ca8b89f1303582083bb Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Tue, 5 Dec 2023 13:25:53 -0700 Subject: [PATCH 2/6] skip test on unsupported platforms --- tests/tests_verify_notification.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/tests_verify_notification.yml b/tests/tests_verify_notification.yml index 6b64473a..fe0896e8 100644 --- a/tests/tests_verify_notification.yml +++ b/tests/tests_verify_notification.yml @@ -11,6 +11,12 @@ metrics_webhook_endpoint: "{{ __test_webhook }}" pre_tasks: + - name: Skip test if not supported by platform + meta: end_host + when: (ansible_distribution in ['RedHat', 'CentOS'] and + (ansible_facts['distribution_version'] is version('9.3', '<'))) or + ansible_distribution not in ['Fedora', 'RedHat', 'CentOS'] + - name: Save state of services import_tasks: get_services_state.yml From 678a031ad1aa88a69bc61b317ca65c1b030a400f Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Mon, 11 Dec 2023 13:25:12 -0700 Subject: [PATCH 3/6] fix: fix various test failures --- tests/restore_services_state.yml | 1 + tests/tests_bz1855544.yml | 51 ++++++++++++++++++++------------ tests/tests_default.yml | 16 ++++++++++ 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/tests/restore_services_state.yml b/tests/restore_services_state.yml index 7a27c7f7..56684a2f 100644 --- a/tests/restore_services_state.yml +++ b/tests/restore_services_state.yml @@ -16,6 +16,7 @@ when: - item + '.service' in final_state.ansible_facts.services - item + '.service' in initial_state.ansible_facts.services + - initial_state.ansible_facts.services[item + '.service']['status'] != 'not-found' with_items: - pmcd - pmlogger diff --git a/tests/tests_bz1855544.yml b/tests/tests_bz1855544.yml index 6fc11809..234ef63d 100644 --- a/tests/tests_bz1855544.yml +++ b/tests/tests_bz1855544.yml @@ -4,13 +4,6 @@ Grafana datasources" hosts: all - roles: - - role: linux-system-roles.metrics - vars: - metrics_query_service: true - metrics_graph_service: true - metrics_from_bpftrace: true - pre_tasks: - name: Stop test meta: end_host @@ -22,17 +15,37 @@ import_tasks: get_services_state.yml tasks: - - name: Check if all default datasources are configured - include_tasks: check_default_datasources.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + vars: + metrics_query_service: true + metrics_graph_service: true + metrics_from_bpftrace: true + + - name: Check if all default datasources are configured + include_tasks: check_default_datasources.yml - - name: Check if all required PMDAs are installed - command: pmprobe -I pmcd.agent.status - register: status - until: status.stdout.find("bpftrace") != -1 - retries: 10 - delay: 1 - changed_when: false + - name: Check if all required PMDAs are installed + command: pmprobe -I pmcd.agent.status + register: status + until: status.stdout.find("bpftrace") != -1 + retries: 10 + delay: 1 + changed_when: false + rescue: + - name: Check logs + shell: | + journalctl -ex + echo '#####################' + grep type=AVC /var/log/audit/audit.log + changed_when: false - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + - name: Fail test + fail: + msg: "{{ ansible_failed_result }}" + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_default.yml b/tests/tests_default.yml index 806aca5a..4d8a6b4d 100644 --- a/tests/tests_default.yml +++ b/tests/tests_default.yml @@ -5,3 +5,19 @@ roles: - linux-system-roles.metrics + post_tasks: + - name: Debug - restart pmlogger and check failure + changed_when: false + shell: | + if systemctl restart pmlogger.service; then + echo SUCCESS + else + echo '##########################' + systemctl status pmlogger.service + echo '##########################' + journalctl -u pmlogger.service + echo '##########################' + journalctl -ex + echo '##########################' + grep type=AVC /var/log/audit/audit.log + fi From acf9ceff024e9629b93cad1d4834738fffbca9f5 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Mon, 11 Dec 2023 16:58:48 -0700 Subject: [PATCH 4/6] block/rescue/always for every test with failure data gathering --- tests/tests_bz1855539.yml | 60 ++++++++++++++--------- tests/tests_bz1855544.yml | 24 +++++---- tests/tests_default.yml | 40 ++++++++------- tests/tests_verify_auth.yml | 56 ++++++++++++--------- tests/tests_verify_basic.yml | 45 ++++++++++------- tests/tests_verify_bpftrace.yml | 48 +++++++++++------- tests/tests_verify_from_elasticsearch.yml | 31 +++++++----- tests/tests_verify_fullstack.yml | 55 ++++++++++++--------- tests/tests_verify_graph.yml | 39 +++++++++------ tests/tests_verify_into_elasticsearch.yml | 31 ++++++++---- tests/tests_verify_mssql.yml | 39 +++++++++------ tests/tests_verify_notification.yml | 38 ++++++++------ tests/tests_verify_pmie_webhook.yml | 31 ++++++++---- tests/tests_verify_postfix.yml | 43 ++++++++++------ tests/tests_verify_query.yml | 45 ++++++++++------- tests/tests_verify_retention.yml | 42 ++++++++++------ 16 files changed, 410 insertions(+), 257 deletions(-) diff --git a/tests/tests_bz1855539.yml b/tests/tests_bz1855539.yml index 25550e4b..42eb9ce2 100644 --- a/tests/tests_bz1855539.yml +++ b/tests/tests_bz1855539.yml @@ -5,10 +5,7 @@ hosts: all vars: pcp_pmie_control_path: "/etc/pcp/pmie/control.d/" - roles: - - role: linux-system-roles.metrics - vars: - metrics_monitored_hosts: ["127.0.0.2", "127.0.0.3"] + metrics_monitored_hosts: ["127.0.0.2", "127.0.0.3"] pre_tasks: - name: Stop test @@ -21,22 +18,39 @@ import_tasks: get_services_state.yml tasks: - - name: Check if pmie configuration file on remote host is the secondary one - command: |- - grep -E '^\s*\S+\s+n\s+n\s+' {{ pcp_pmie_control_path }}/{{ item }} - loop: "{{ metrics_monitored_hosts }}" - changed_when: false - - - name: Check if pmie configuration file on remote host has the header - vars: - __test_config_path: "{{ pcp_pmie_control_path }}{{ item }}" - include_tasks: check_header.yml - loop: "{{ metrics_monitored_hosts }}" - - - name: Check if pmie configuration file on local host is the primary one - command: grep -E '^\s*\S+\s+y\s+n\s+' {{ pcp_pmie_control_path }}/local - changed_when: false - - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + - name: Run test + block: + - name: Include role + include_role: + name: linux-system-roles.metrics + public: true + + - name: >- + Check if pmie configuration file on remote host is the secondary one + command: |- + grep -E '^\s*\S+\s+n\s+n\s+' {{ pcp_pmie_control_path }}/{{ item }} + loop: "{{ metrics_monitored_hosts }}" + changed_when: false + + - name: Check if pmie configuration file on remote host has the header + vars: + __test_config_path: "{{ pcp_pmie_control_path }}{{ item }}" + include_tasks: check_header.yml + loop: "{{ metrics_monitored_hosts }}" + + - name: >- + Check if pmie configuration file on local host is the primary one + command: >- + grep -E '^\s*\S+\s+y\s+n\s+' {{ pcp_pmie_control_path }}/local + changed_when: false + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml + + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_bz1855544.yml b/tests/tests_bz1855544.yml index 234ef63d..8d48b022 100644 --- a/tests/tests_bz1855544.yml +++ b/tests/tests_bz1855544.yml @@ -3,6 +3,10 @@ - name: "Bug 1855544 - metrics role should automate the setup of Grafana datasources" hosts: all + vars: + metrics_query_service: true + metrics_graph_service: true + metrics_from_bpftrace: true pre_tasks: - name: Stop test @@ -20,10 +24,7 @@ - name: Run the role include_role: name: linux-system-roles.metrics - vars: - metrics_query_service: true - metrics_graph_service: true - metrics_from_bpftrace: true + public: true - name: Check if all default datasources are configured include_tasks: check_default_datasources.yml @@ -35,17 +36,14 @@ retries: 10 delay: 1 changed_when: false + + - name: Flush handlers + meta: flush_handlers + rescue: - - name: Check logs - shell: | - journalctl -ex - echo '#####################' - grep type=AVC /var/log/audit/audit.log - changed_when: false + - name: Handle test failure + include_tasks: handle_test_failure.yml - - name: Fail test - fail: - msg: "{{ ansible_failed_result }}" always: - name: Restore state of services import_tasks: restore_services_state.yml diff --git a/tests/tests_default.yml b/tests/tests_default.yml index 4d8a6b4d..f6542bb9 100644 --- a/tests/tests_default.yml +++ b/tests/tests_default.yml @@ -3,21 +3,25 @@ - name: Test the role with default params hosts: all - roles: - - linux-system-roles.metrics - post_tasks: - - name: Debug - restart pmlogger and check failure - changed_when: false - shell: | - if systemctl restart pmlogger.service; then - echo SUCCESS - else - echo '##########################' - systemctl status pmlogger.service - echo '##########################' - journalctl -u pmlogger.service - echo '##########################' - journalctl -ex - echo '##########################' - grep type=AVC /var/log/audit/audit.log - fi + pre_tasks: + - name: Save state of services + import_tasks: get_services_state.yml + + tasks: + - name: Run test + block: + - name: Include role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml + + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_auth.yml b/tests/tests_verify_auth.yml index f73022f6..46393151 100644 --- a/tests/tests_verify_auth.yml +++ b/tests/tests_verify_auth.yml @@ -4,15 +4,11 @@ hosts: all vars: __test_uname: pcptest - - roles: - - role: linux-system-roles.metrics - vars: - metrics_username: "{{ __test_uname }}" - metrics_password: "{{ pcptest_pw }}" - # test to install firewall but not selinux - metrics_manage_firewall: true - metrics_manage_selinux: false + metrics_username: "{{ __test_uname }}" + metrics_password: "{{ pcptest_pw }}" + # test to install firewall but not selinux + metrics_manage_firewall: true + metrics_manage_selinux: false pre_tasks: - name: Stop test @@ -25,17 +21,31 @@ import_tasks: get_services_state.yml tasks: - - name: Restart PMCD - # noqa command-instead-of-module - shell: systemctl restart pmcd && sleep 5 - changed_when: false - - - name: Check if SASL works - include_tasks: "{{ item }}" - loop: - - check_sasl.yml - - check_firewall_selinux.yml - - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + - name: Run test + block: + - name: Run role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Restart PMCD + # noqa command-instead-of-module + shell: systemctl restart pmcd && sleep 5 + changed_when: false + + - name: Check if SASL works + include_tasks: "{{ item }}" + loop: + - check_sasl.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml + + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_basic.yml b/tests/tests_verify_basic.yml index 754b3b77..9f20904b 100644 --- a/tests/tests_verify_basic.yml +++ b/tests/tests_verify_basic.yml @@ -2,27 +2,38 @@ --- - name: Test basic services of the role are installed and running hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - # test to install selinux but not firewall - metrics_manage_firewall: false - metrics_manage_selinux: true + vars: + # test to install selinux but not firewall + metrics_manage_firewall: false + metrics_manage_selinux: true pre_tasks: - name: Save state of services import_tasks: get_services_state.yml tasks: - - name: Check if basic metrics role setup works - include_tasks: "{{ item }}" - loop: - - check_pcp.yml - - check_pmlogger.yml - - check_pmie.yml - - check_firewall_selinux.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check if basic metrics role setup works + include_tasks: "{{ item }}" + loop: + - check_pcp.yml + - check_pmlogger.yml + - check_pmie.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_bpftrace.yml b/tests/tests_verify_bpftrace.yml index 674e5904..855fd3cd 100644 --- a/tests/tests_verify_bpftrace.yml +++ b/tests/tests_verify_bpftrace.yml @@ -4,16 +4,12 @@ hosts: all vars: __test_uname: pcptest - - roles: - - role: linux-system-roles.metrics - vars: - metrics_from_bpftrace: true - metrics_username: "{{ __test_uname }}" - metrics_password: "{{ pcptest_pw }}" - # test not to install selinux and firewall, explicitly - metrics_manage_firewall: false - metrics_manage_selinux: false + metrics_from_bpftrace: true + metrics_username: "{{ __test_uname }}" + metrics_password: "{{ pcptest_pw }}" + # test not to install selinux and firewall, explicitly + metrics_manage_firewall: false + metrics_manage_selinux: false pre_tasks: - name: Stop test @@ -26,13 +22,27 @@ import_tasks: get_services_state.yml tasks: - - name: Check if BPFTrace & SASL works - include_tasks: "{{ item }}" - loop: - - check_bpftrace.yml - - check_sasl.yml - - check_firewall_selinux.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check if BPFTrace & SASL works + include_tasks: "{{ item }}" + loop: + - check_bpftrace.yml + - check_sasl.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_from_elasticsearch.yml b/tests/tests_verify_from_elasticsearch.yml index 95046641..4a1967ec 100644 --- a/tests/tests_verify_from_elasticsearch.yml +++ b/tests/tests_verify_from_elasticsearch.yml @@ -2,6 +2,8 @@ --- - name: Test import from Elasticsearch hosts: all + vars: + metrics_from_elasticsearch: true pre_tasks: - name: Stop test @@ -14,16 +16,23 @@ import_tasks: get_services_state.yml tasks: - - name: Run the metrics role to configure Elasticsearch - include_role: - name: linux-system-roles.metrics - public: true - vars: - metrics_from_elasticsearch: true + - name: Run test + block: + - name: Run the metrics role to configure Elasticsearch + include_role: + name: linux-system-roles.metrics + public: true - - name: Check if import from Elasticsearch works - include_tasks: check_from_elasticsearch.yml + - name: Check if import from Elasticsearch works + include_tasks: check_from_elasticsearch.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml + + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_fullstack.yml b/tests/tests_verify_fullstack.yml index fca646e6..009ef418 100644 --- a/tests/tests_verify_fullstack.yml +++ b/tests/tests_verify_fullstack.yml @@ -2,14 +2,11 @@ --- - name: Test the full PCP/Redis/Grafana stack hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - metrics_query_service: true - metrics_graph_service: true - metrics_manage_firewall: true - metrics_manage_selinux: true + vars: + metrics_query_service: true + metrics_graph_service: true + metrics_manage_firewall: true + metrics_manage_selinux: true pre_tasks: - name: Stop test @@ -22,18 +19,32 @@ import_tasks: get_services_state.yml tasks: - - name: Check if all services work - include_tasks: "{{ item }}" - loop: - - check_pcp.yml - - check_pmlogger.yml - - check_pmie.yml - - check_redis.yml - - check_pmproxy.yml - - check_grafana.yml - - check_grafanapcp.yml - - check_firewall_selinux.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check if all services work + include_tasks: "{{ item }}" + loop: + - check_pcp.yml + - check_pmlogger.yml + - check_pmie.yml + - check_redis.yml + - check_pmproxy.yml + - check_grafana.yml + - check_grafanapcp.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_graph.yml b/tests/tests_verify_graph.yml index 606f28a0..d0c00499 100644 --- a/tests/tests_verify_graph.yml +++ b/tests/tests_verify_graph.yml @@ -2,11 +2,8 @@ --- - name: Test the role with graph service enabled hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - metrics_graph_service: true + vars: + metrics_graph_service: true pre_tasks: - name: Stop test @@ -19,13 +16,27 @@ import_tasks: get_services_state.yml tasks: - - name: Check if Grafana & Grafana-PCP work - include_tasks: "{{ item }}" - loop: - - check_grafana.yml - - check_grafanapcp.yml - - check_firewall_selinux.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check if Grafana & Grafana-PCP work + include_tasks: "{{ item }}" + loop: + - check_grafana.yml + - check_grafanapcp.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_into_elasticsearch.yml b/tests/tests_verify_into_elasticsearch.yml index e7aa7da1..c8cb3b21 100644 --- a/tests/tests_verify_into_elasticsearch.yml +++ b/tests/tests_verify_into_elasticsearch.yml @@ -2,11 +2,8 @@ --- - name: Test import to Elasticsearch hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - metrics_into_elasticsearch: true + vars: + metrics_into_elasticsearch: true pre_tasks: - name: Stop test @@ -19,9 +16,23 @@ import_tasks: get_services_state.yml tasks: - - name: Check if import to Elasticsearch works - include_tasks: check_into_elasticsearch.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check if import to Elasticsearch works + include_tasks: check_into_elasticsearch.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_mssql.yml b/tests/tests_verify_mssql.yml index 8eeaf248..a4949732 100644 --- a/tests/tests_verify_mssql.yml +++ b/tests/tests_verify_mssql.yml @@ -2,11 +2,8 @@ --- - name: Test installation and configuration of MSSQL agent hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - metrics_from_mssql: true + vars: + metrics_from_mssql: true pre_tasks: - name: Stop test @@ -39,12 +36,26 @@ ternary('ansible.posix.rhel_rpm_ostree', omit) }}" tasks: - - name: Check MSSQL functionality - include_tasks: "{{ item }}" - loop: - - check_mssql.yml - - check_firewall_selinux.yml - - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check MSSQL functionality + include_tasks: "{{ item }}" + loop: + - check_mssql.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml + + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_notification.yml b/tests/tests_verify_notification.yml index fe0896e8..3fbc092c 100644 --- a/tests/tests_verify_notification.yml +++ b/tests/tests_verify_notification.yml @@ -4,11 +4,7 @@ hosts: all vars: __test_webhook: https://example.com:4242/endpoint - - roles: - - role: linux-system-roles.metrics - vars: - metrics_webhook_endpoint: "{{ __test_webhook }}" + metrics_webhook_endpoint: "{{ __test_webhook }}" pre_tasks: - name: Skip test if not supported by platform @@ -21,13 +17,27 @@ import_tasks: get_services_state.yml tasks: - - name: Check basic services and the webhook endpoint - include_tasks: "{{ item }}" - loop: - - check_pcp.yml - - check_pmie.yml - - check_notification.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check basic services and the webhook endpoint + include_tasks: "{{ item }}" + loop: + - check_pcp.yml + - check_pmie.yml + - check_notification.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_pmie_webhook.yml b/tests/tests_verify_pmie_webhook.yml index 926b91d6..ec2bc71f 100644 --- a/tests/tests_verify_pmie_webhook.yml +++ b/tests/tests_verify_pmie_webhook.yml @@ -2,11 +2,8 @@ --- - name: Test pmie webhook configuration hosts: all - - roles: - - role: performancecopilot.metrics.pcp - vars: - pcp_pmie_endpoint: https://example.com:12345/webhook + vars: + pcp_pmie_endpoint: https://example.com:12345/webhook pre_tasks: - name: Skip test if not supported by platform @@ -19,9 +16,23 @@ import_tasks: get_services_state.yml tasks: - - name: Check if configuring pmie webhook works - include_tasks: check_pmie_webhook.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check if configuring pmie webhook works + include_tasks: check_pmie_webhook.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_postfix.yml b/tests/tests_verify_postfix.yml index f421a28b..443a7e4d 100644 --- a/tests/tests_verify_postfix.yml +++ b/tests/tests_verify_postfix.yml @@ -2,11 +2,8 @@ --- - name: Test installation and configuration of Postfix agent hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - metrics_from_postfix: true + vars: + metrics_from_postfix: true pre_tasks: - name: Stop test @@ -51,16 +48,30 @@ mode: 0600 tasks: - - name: Signal pmcd to ensure above path used by pmdapostfix - command: pmstore pmcd.control.sighup 1 - changed_when: false + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Signal pmcd to ensure above path used by pmdapostfix + command: pmstore pmcd.control.sighup 1 + changed_when: false + + - name: Check Postfix functionality + include_tasks: "{{ item }}" + loop: + - check_postfix.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers - - name: Check Postfix functionality - include_tasks: "{{ item }}" - loop: - - check_postfix.yml - - check_firewall_selinux.yml + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_query.yml b/tests/tests_verify_query.yml index d5226ddf..37847e5d 100644 --- a/tests/tests_verify_query.yml +++ b/tests/tests_verify_query.yml @@ -2,11 +2,8 @@ --- - name: Test the role with query service enabled hosts: all - - roles: - - role: linux-system-roles.metrics - vars: - metrics_query_service: true + vars: + metrics_query_service: true pre_tasks: - name: End test @@ -19,16 +16,30 @@ import_tasks: get_services_state.yml tasks: - - name: Check all the required services - include_tasks: "{{ item }}" - loop: - - check_pcp.yml - - check_pmlogger.yml - - check_pmie.yml - - check_redis.yml - - check_pmproxy.yml - - check_firewall_selinux.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check all the required services + include_tasks: "{{ item }}" + loop: + - check_pcp.yml + - check_pmlogger.yml + - check_pmie.yml + - check_redis.yml + - check_pmproxy.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml diff --git a/tests/tests_verify_retention.yml b/tests/tests_verify_retention.yml index 663713ca..eca06f69 100644 --- a/tests/tests_verify_retention.yml +++ b/tests/tests_verify_retention.yml @@ -4,26 +4,36 @@ hosts: all vars: __test_retention: 137 - - roles: - - role: linux-system-roles.metrics - vars: - metrics_retention_days: "{{ __test_retention }}" + metrics_retention_days: "{{ __test_retention }}" pre_tasks: - name: Save state of services import_tasks: get_services_state.yml tasks: - - name: Check basic services and the retention - include_tasks: "{{ item }}" - loop: - - check_pcp.yml - - check_pmlogger.yml - - check_pmie.yml - - check_retention.yml - - check_firewall_selinux.yml + - name: Run test + block: + - name: Run the role + include_role: + name: linux-system-roles.metrics + public: true + + - name: Check basic services and the retention + include_tasks: "{{ item }}" + loop: + - check_pcp.yml + - check_pmlogger.yml + - check_pmie.yml + - check_retention.yml + - check_firewall_selinux.yml + + - name: Flush handlers + meta: flush_handlers + + rescue: + - name: Handle failure case + include_tasks: handle_test_failure.yml - post_tasks: - - name: Restore state of services - import_tasks: restore_services_state.yml + always: + - name: Restore state of services + import_tasks: restore_services_state.yml From 99586c4aec29d91bf98b8a7d2736621682181cb9 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Mon, 11 Dec 2023 17:00:39 -0700 Subject: [PATCH 5/6] more --- tests/handle_test_failure.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/handle_test_failure.yml diff --git a/tests/handle_test_failure.yml b/tests/handle_test_failure.yml new file mode 100644 index 00000000..6d03dd5b --- /dev/null +++ b/tests/handle_test_failure.yml @@ -0,0 +1,11 @@ +--- +- name: Collect logs + shell: | + journalctl -ex + echo '##################' + grep type=AVC /var/log/audit/audit.log + changed_when: false + +- name: Reraise error + fail: + msg: "{{ ansible_failed_result }}" From cbb3073c4da98eb094ff04b8399380fb5f1ad9b7 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Tue, 12 Dec 2023 07:31:24 -0700 Subject: [PATCH 6/6] more-debugging --- tests/handle_test_failure.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/handle_test_failure.yml b/tests/handle_test_failure.yml index 6d03dd5b..9ddb975b 100644 --- a/tests/handle_test_failure.yml +++ b/tests/handle_test_failure.yml @@ -3,7 +3,15 @@ shell: | journalctl -ex echo '##################' + echo List of SELinux AVCs - note list may be empty grep type=AVC /var/log/audit/audit.log + echo '##################' + ls -alrtF /run + if [ -d /run/pcp ]; then + ls -alrtF /run/pcp + else + echo ERROR - /run/pcp does not exist + fi changed_when: false - name: Reraise error