Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add missing pmie webhook action configuration functionality #183

Merged
merged 6 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ Example:
metrics_monitored_hosts: ["webserver.example.com", "database.example.com"]
```

### metrics_webhook_endpoint: ''

Webhook endpoint (URL) where notification about any automatically detected
performance issues are to be sent. By default, these events are logged to
the local system log only.

### metrics_retention_days: 14

Retain historical performance data for the specified number of days; after
Expand Down
5 changes: 5 additions & 0 deletions defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,8 @@ metrics_manage_firewall: false
# If true, manage the pmcd port, pmproxy ports using the selinux role
# depending upon the configuration parameters.
metrics_manage_selinux: false

# Webhook endpoint (URL) where notification about any automatically detected
# performance issues are to be sent. By default, these events are logged to the
# local system log only.
metrics_webhook_endpoint: ''
1 change: 1 addition & 0 deletions tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@

- name: Setup metric collection service.
vars:
pcp_pmie_endpoint: "{{ metrics_webhook_endpoint }}"
pcp_pmlogger_discard: "{{ metrics_retention_days }}"
pcp_target_hosts: "{{ metrics_monitored_hosts }}"
pcp_optional_agents: "{{ __metrics_domains }}"
Expand Down
7 changes: 7 additions & 0 deletions tests/check_notification.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-License-Identifier: MIT
---
# yamllint disable rule:line-length
- name: Check if webhook notification is set
command: grep -E 'webhook_endpoint .* "?{{ __test_webhook }}"?' /var/lib/pcp/config/pmie/config.default
changed_when: false
# yamllint enable rule:line-length
19 changes: 19 additions & 0 deletions tests/handle_test_failure.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
- name: Collect logs
shell: |
journalctl -ex
echo '##################'
echo List of SELinux AVCs - note list may be empty
grep type=AVC /var/log/audit/audit.log
echo '##################'
ls -alrtF /run
if [ -d /run/pcp ]; then
ls -alrtF /run/pcp
else
echo ERROR - /run/pcp does not exist
fi
changed_when: false

- name: Reraise error
fail:
msg: "{{ ansible_failed_result }}"
1 change: 1 addition & 0 deletions tests/restore_services_state.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
when:
- item + '.service' in final_state.ansible_facts.services
- item + '.service' in initial_state.ansible_facts.services
- initial_state.ansible_facts.services[item + '.service']['status'] != 'not-found'
with_items:
- pmcd
- pmlogger
Expand Down
60 changes: 37 additions & 23 deletions tests/tests_bz1855539.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
hosts: all
vars:
pcp_pmie_control_path: "/etc/pcp/pmie/control.d/"
roles:
- role: linux-system-roles.metrics
vars:
metrics_monitored_hosts: ["127.0.0.2", "127.0.0.3"]
metrics_monitored_hosts: ["127.0.0.2", "127.0.0.3"]

pre_tasks:
- name: Stop test
Expand All @@ -21,22 +18,39 @@
import_tasks: get_services_state.yml

tasks:
- name: Check if pmie configuration file on remote host is the secondary one
command: |-
grep -E '^\s*\S+\s+n\s+n\s+' {{ pcp_pmie_control_path }}/{{ item }}
loop: "{{ metrics_monitored_hosts }}"
changed_when: false

- name: Check if pmie configuration file on remote host has the header
vars:
__test_config_path: "{{ pcp_pmie_control_path }}{{ item }}"
include_tasks: check_header.yml
loop: "{{ metrics_monitored_hosts }}"

- name: Check if pmie configuration file on local host is the primary one
command: grep -E '^\s*\S+\s+y\s+n\s+' {{ pcp_pmie_control_path }}/local
changed_when: false

post_tasks:
- name: Restore state of services
import_tasks: restore_services_state.yml
- name: Run test
block:
- name: Include role
include_role:
name: linux-system-roles.metrics
public: true

- name: >-
Check if pmie configuration file on remote host is the secondary one
command: |-
grep -E '^\s*\S+\s+n\s+n\s+' {{ pcp_pmie_control_path }}/{{ item }}
loop: "{{ metrics_monitored_hosts }}"
changed_when: false

- name: Check if pmie configuration file on remote host has the header
vars:
__test_config_path: "{{ pcp_pmie_control_path }}{{ item }}"
include_tasks: check_header.yml
loop: "{{ metrics_monitored_hosts }}"

- name: >-
Check if pmie configuration file on local host is the primary one
command: >-
grep -E '^\s*\S+\s+y\s+n\s+' {{ pcp_pmie_control_path }}/local
changed_when: false

- name: Flush handlers
meta: flush_handlers

rescue:
- name: Handle failure case
include_tasks: handle_test_failure.yml

always:
- name: Restore state of services
import_tasks: restore_services_state.yml
53 changes: 32 additions & 21 deletions tests/tests_bz1855544.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
- name: "Bug 1855544 - metrics role should automate the setup of
Grafana datasources"
hosts: all

roles:
- role: linux-system-roles.metrics
vars:
metrics_query_service: true
metrics_graph_service: true
metrics_from_bpftrace: true
vars:
metrics_query_service: true
metrics_graph_service: true
metrics_from_bpftrace: true

pre_tasks:
- name: Stop test
Expand All @@ -22,17 +19,31 @@
import_tasks: get_services_state.yml

tasks:
- name: Check if all default datasources are configured
include_tasks: check_default_datasources.yml

- name: Check if all required PMDAs are installed
command: pmprobe -I pmcd.agent.status
register: status
until: status.stdout.find("bpftrace") != -1
retries: 10
delay: 1
changed_when: false

post_tasks:
- name: Restore state of services
import_tasks: restore_services_state.yml
- name: Run test
block:
- name: Run the role
include_role:
name: linux-system-roles.metrics
public: true

- name: Check if all default datasources are configured
include_tasks: check_default_datasources.yml

- name: Check if all required PMDAs are installed
command: pmprobe -I pmcd.agent.status
register: status
until: status.stdout.find("bpftrace") != -1
retries: 10
delay: 1
changed_when: false

- name: Flush handlers
meta: flush_handlers

rescue:
- name: Handle test failure
include_tasks: handle_test_failure.yml

always:
- name: Restore state of services
import_tasks: restore_services_state.yml
24 changes: 22 additions & 2 deletions tests/tests_default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,25 @@
- name: Test the role with default params
hosts: all

roles:
- linux-system-roles.metrics
pre_tasks:
- name: Save state of services
import_tasks: get_services_state.yml

tasks:
- name: Run test
block:
- name: Include role
include_role:
name: linux-system-roles.metrics
public: true

- name: Flush handlers
meta: flush_handlers

rescue:
- name: Handle failure case
include_tasks: handle_test_failure.yml

always:
- name: Restore state of services
import_tasks: restore_services_state.yml
56 changes: 33 additions & 23 deletions tests/tests_verify_auth.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@
hosts: all
vars:
__test_uname: pcptest

roles:
- role: linux-system-roles.metrics
vars:
metrics_username: "{{ __test_uname }}"
metrics_password: "{{ pcptest_pw }}"
# test to install firewall but not selinux
metrics_manage_firewall: true
metrics_manage_selinux: false
metrics_username: "{{ __test_uname }}"
metrics_password: "{{ pcptest_pw }}"
# test to install firewall but not selinux
metrics_manage_firewall: true
metrics_manage_selinux: false

pre_tasks:
- name: Stop test
Expand All @@ -25,17 +21,31 @@
import_tasks: get_services_state.yml

tasks:
- name: Restart PMCD
# noqa command-instead-of-module
shell: systemctl restart pmcd && sleep 5
changed_when: false

- name: Check if SASL works
include_tasks: "{{ item }}"
loop:
- check_sasl.yml
- check_firewall_selinux.yml

post_tasks:
- name: Restore state of services
import_tasks: restore_services_state.yml
- name: Run test
block:
- name: Run role
include_role:
name: linux-system-roles.metrics
public: true

- name: Restart PMCD
# noqa command-instead-of-module
shell: systemctl restart pmcd && sleep 5
changed_when: false

- name: Check if SASL works
include_tasks: "{{ item }}"
loop:
- check_sasl.yml
- check_firewall_selinux.yml

- name: Flush handlers
meta: flush_handlers

rescue:
- name: Handle failure case
include_tasks: handle_test_failure.yml

always:
- name: Restore state of services
import_tasks: restore_services_state.yml
45 changes: 28 additions & 17 deletions tests/tests_verify_basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,38 @@
---
- name: Test basic services of the role are installed and running
hosts: all

roles:
- role: linux-system-roles.metrics
vars:
# test to install selinux but not firewall
metrics_manage_firewall: false
metrics_manage_selinux: true
vars:
# test to install selinux but not firewall
metrics_manage_firewall: false
metrics_manage_selinux: true

pre_tasks:
- name: Save state of services
import_tasks: get_services_state.yml

tasks:
- name: Check if basic metrics role setup works
include_tasks: "{{ item }}"
loop:
- check_pcp.yml
- check_pmlogger.yml
- check_pmie.yml
- check_firewall_selinux.yml
- name: Run test
block:
- name: Run the role
include_role:
name: linux-system-roles.metrics
public: true

- name: Check if basic metrics role setup works
include_tasks: "{{ item }}"
loop:
- check_pcp.yml
- check_pmlogger.yml
- check_pmie.yml
- check_firewall_selinux.yml

- name: Flush handlers
meta: flush_handlers

rescue:
- name: Handle failure case
include_tasks: handle_test_failure.yml

post_tasks:
- name: Restore state of services
import_tasks: restore_services_state.yml
always:
- name: Restore state of services
import_tasks: restore_services_state.yml
Loading
Loading