From a43bf79b485ce4fe09eb5362b159b5fd664336f8 Mon Sep 17 00:00:00 2001 From: v-zhuravlev Date: Tue, 21 May 2024 23:34:49 +0800 Subject: [PATCH] Backport post-setup healthcheck from agent to alloy --- roles/alloy/handlers/main.yml | 5 +++++ roles/alloy/tasks/ga-started.yml | 29 +++++++++++++++++++++++++++++ roles/alloy/tasks/service.yml | 5 ++++- roles/alloy/vars/main.yml | 2 ++ 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 roles/alloy/tasks/ga-started.yml create mode 100644 roles/alloy/vars/main.yml diff --git a/roles/alloy/handlers/main.yml b/roles/alloy/handlers/main.yml index 40d72dec..2c30728c 100644 --- a/roles/alloy/handlers/main.yml +++ b/roles/alloy/handlers/main.yml @@ -3,3 +3,8 @@ name: "{{ service_name }}" state: restarted become: true + listen: "Restart alloy" + +- name: Check alloy is started properly + ansible.builtin.include_tasks: ga-started.yml + listen: "Restart alloy" diff --git a/roles/alloy/tasks/ga-started.yml b/roles/alloy/tasks/ga-started.yml new file mode 100644 index 00000000..d535d816 --- /dev/null +++ b/roles/alloy/tasks/ga-started.yml @@ -0,0 +1,29 @@ +--- +- name: Health check Grafana Alloy + ansible.builtin.uri: + url: "{{ _alloy_healthcheck_endpoint }}" + follow_redirects: none + method: GET + register: _result + failed_when: false + until: _result.status == 200 + retries: 3 + delay: 5 + changed_when: false + when: not ansible_check_mode + +- name: Check system logs if Grafana Alloy is not started + when: not ansible_check_mode and _result.status != 200 + block: + - name: Run journalctl + ansible.builtin.shell: + cmd: "journalctl -u {{ service_name }} -b -n20 --no-pager" + register: journal_ret + changed_when: false + - name: Output Grafana Alloy logs + ansible.builtin.debug: + var: journal_ret.stdout_lines + - name: Raise alerts + ansible.builtin.assert: + that: false + fail_msg: "Service {{ service_name }} hasn't started." diff --git a/roles/alloy/tasks/service.yml b/roles/alloy/tasks/service.yml index 0d5d42af..e4e87f12 100644 --- a/roles/alloy/tasks/service.yml +++ b/roles/alloy/tasks/service.yml @@ -2,7 +2,7 @@ ansible.builtin.template: src: alloy.service.j2 dest: /etc/systemd/system/{{ service_name }}.service - mode: '0644' + mode: "0644" become: true notify: Restart alloy @@ -11,6 +11,9 @@ daemon_reload: yes become: true +- name: Flush handlers + ansible.builtin.meta: flush_handlers + - name: Ensure alloy service is enabled and running ansible.builtin.service: name: "{{ service_name }}" diff --git a/roles/alloy/vars/main.yml b/roles/alloy/vars/main.yml new file mode 100644 index 00000000..ab47eb2c --- /dev/null +++ b/roles/alloy/vars/main.yml @@ -0,0 +1,2 @@ +# Server http address, used in self health check after start +_alloy_healthcheck_endpoint: "http://{{ alloy_flags_extra['server.http.listen-addr'] if alloy_flags_extra['server.http.listen-addr'] is defined else '127.0.0.1:12345' }}/-/ready"