From 78bb5ae9fd42db42fb70b67f28d3afb939c1f65a Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 15:20:58 +0100 Subject: [PATCH 1/7] chore: setup grafana and prometheus --- superadmin.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/superadmin.yml b/superadmin.yml index 0ceebab1..3631d1b8 100644 --- a/superadmin.yml +++ b/superadmin.yml @@ -95,7 +95,39 @@ services: - AWS_USER_ROLE=$AWS_USER_ROLE - GITHUB_REQUEST_TOKEN=$GITHUB_REQUEST_TOKEN - SINGLE_AUDIO_OR_VIDEO_EPISODE_WORKFLOW_ID=$SINGLE_AUDIO_OR_VIDEO_EPISODE_WORKFLOW_ID + prometheus: + image: prom/prometheus + container_name: prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - 9090:9090 + restart: unless-stopped + volumes: + - ./prometheus:/etc/prometheus + - prom_data:/prometheus + grafana: + image: grafana/grafana + container_name: grafana + ports: + - 3000:3000 + restart: unless-stopped + labels: + - "traefik.enable=true" + - "traefik.http.routers.elements.rule=Host(`grafana.${HOST}`)" + - "traefik.http.services.elements.loadbalancer.server.port=3000" + - "traefik.http.routers.elements.tls=true" + - "traefik.http.routers.elements.tls.certresolver=myresolver" + - "traefik.http.routers.elements.entrypoints=websecure" + environment: + - GF_SECURITY_ADMIN_USER=$GF_SECURITY_ADMIN_USER + - GF_SECURITY_ADMIN_PASSWORD=$GF_SECURITY_ADMIN_PASSWORD + volumes: + - ./grafana:/etc/grafana/provisioning/datasources networks: sphinx-swarm: external: true + +volumes: + prom_data: From 3fb8c678fff679b30138cd55b85fb0d4028249bd Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 16:19:19 +0100 Subject: [PATCH 2/7] chore: remove the labels --- superadmin.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/superadmin.yml b/superadmin.yml index 3631d1b8..b8c8f211 100644 --- a/superadmin.yml +++ b/superadmin.yml @@ -112,13 +112,13 @@ services: ports: - 3000:3000 restart: unless-stopped - labels: - - "traefik.enable=true" - - "traefik.http.routers.elements.rule=Host(`grafana.${HOST}`)" - - "traefik.http.services.elements.loadbalancer.server.port=3000" - - "traefik.http.routers.elements.tls=true" - - "traefik.http.routers.elements.tls.certresolver=myresolver" - - "traefik.http.routers.elements.entrypoints=websecure" + # labels: + # - "traefik.enable=true" + # - "traefik.http.routers.elements.rule=Host(`grafana.${HOST}`)" + # - "traefik.http.services.elements.loadbalancer.server.port=3000" + # - "traefik.http.routers.elements.tls=true" + # - "traefik.http.routers.elements.tls.certresolver=myresolver" + # - "traefik.http.routers.elements.entrypoints=websecure" environment: - GF_SECURITY_ADMIN_USER=$GF_SECURITY_ADMIN_USER - GF_SECURITY_ADMIN_PASSWORD=$GF_SECURITY_ADMIN_PASSWORD From d095d363fb0a832c5300b41c8dad1217b93a6490 Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 16:41:20 +0100 Subject: [PATCH 3/7] chore: add network to grafana --- superadmin.yml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/superadmin.yml b/superadmin.yml index b8c8f211..88293e60 100644 --- a/superadmin.yml +++ b/superadmin.yml @@ -98,6 +98,8 @@ services: prometheus: image: prom/prometheus container_name: prometheus + networks: + - sphinx-swarm command: - '--config.file=/etc/prometheus/prometheus.yml' ports: @@ -109,16 +111,18 @@ services: grafana: image: grafana/grafana container_name: grafana + networks: + - sphinx-swarm ports: - 3000:3000 restart: unless-stopped - # labels: - # - "traefik.enable=true" - # - "traefik.http.routers.elements.rule=Host(`grafana.${HOST}`)" - # - "traefik.http.services.elements.loadbalancer.server.port=3000" - # - "traefik.http.routers.elements.tls=true" - # - "traefik.http.routers.elements.tls.certresolver=myresolver" - # - "traefik.http.routers.elements.entrypoints=websecure" + labels: + - "traefik.enable=true" + - "traefik.http.routers.elements.rule=Host(`grafana.${HOST}`)" + - "traefik.http.services.elements.loadbalancer.server.port=3000" + - "traefik.http.routers.elements.tls=true" + - "traefik.http.routers.elements.tls.certresolver=myresolver" + - "traefik.http.routers.elements.entrypoints=websecure" environment: - GF_SECURITY_ADMIN_USER=$GF_SECURITY_ADMIN_USER - GF_SECURITY_ADMIN_PASSWORD=$GF_SECURITY_ADMIN_PASSWORD From 0d0cd52e8f0cfa2efee1547b5cf936d281bb97b5 Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 16:49:24 +0100 Subject: [PATCH 4/7] fix: use container name --- superadmin.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/superadmin.yml b/superadmin.yml index 88293e60..6a2e62fa 100644 --- a/superadmin.yml +++ b/superadmin.yml @@ -118,11 +118,11 @@ services: restart: unless-stopped labels: - "traefik.enable=true" - - "traefik.http.routers.elements.rule=Host(`grafana.${HOST}`)" - - "traefik.http.services.elements.loadbalancer.server.port=3000" - - "traefik.http.routers.elements.tls=true" - - "traefik.http.routers.elements.tls.certresolver=myresolver" - - "traefik.http.routers.elements.entrypoints=websecure" + - "traefik.http.routers.grafana.rule=Host(`grafana.${HOST}`)" + - "traefik.http.services.grafana.loadbalancer.server.port=3000" + - "traefik.http.routers.grafana.tls=true" + - "traefik.http.routers.grafana.tls.certresolver=myresolver" + - "traefik.http.routers.grafana.entrypoints=websecure" environment: - GF_SECURITY_ADMIN_USER=$GF_SECURITY_ADMIN_USER - GF_SECURITY_ADMIN_PASSWORD=$GF_SECURITY_ADMIN_PASSWORD From 32cf6e04c97ab93b529a57bdbd43409e7b837ccb Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 17:03:02 +0100 Subject: [PATCH 5/7] chore: make prometheus accessible rom traefik --- superadmin.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/superadmin.yml b/superadmin.yml index 6a2e62fa..30596e0b 100644 --- a/superadmin.yml +++ b/superadmin.yml @@ -105,6 +105,13 @@ services: ports: - 9090:9090 restart: unless-stopped + labels: + - "traefik.enable=true" + - "traefik.http.routers.prometheus.rule=Host(`prometheus.${HOST}`)" + - "traefik.http.services.prometheus.loadbalancer.server.port=9090" + - "traefik.http.routers.prometheus.tls=true" + - "traefik.http.routers.prometheus.tls.certresolver=myresolver" + - "traefik.http.routers.prometheus.entrypoints=websecure" volumes: - ./prometheus:/etc/prometheus - prom_data:/prometheus From 88c2e64596d6c0694250b6e68db884d100346890 Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 17:10:43 +0100 Subject: [PATCH 6/7] chore: added node exporter to second brain docker compose --- second-brain.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/second-brain.yml b/second-brain.yml index e400ee12..a2aac122 100644 --- a/second-brain.yml +++ b/second-brain.yml @@ -104,6 +104,16 @@ services: - AWS_S3_BUCKET_NAME=$AWS_S3_BUCKET_NAME - GITHUB_REQUEST_TOKEN=$GITHUB_REQUEST_TOKEN - SINGLE_AUDIO_OR_VIDEO_EPISODE_WORKFLOW_ID=$SINGLE_AUDIO_OR_VIDEO_EPISODE_WORKFLOW_ID + node_exporter: + image: quay.io/prometheus/node-exporter:latest + container_name: node_exporter + command: + - '--path.rootfs=/host' + network_mode: host + pid: host + restart: unless-stopped + volumes: + - '/:/host:ro,rslave' networks: sphinx-swarm: From 423c6d7511e2f9a66491b77ef5e8437618a12120 Mon Sep 17 00:00:00 2001 From: Oluwatobi Bamidele Date: Wed, 18 Dec 2024 21:36:02 +0100 Subject: [PATCH 7/7] prometheus config created --- prometheus/prometheus.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 prometheus/prometheus.yml diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 00000000..401a5e8d --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,24 @@ +global: + scrape_interval: 10s + +scrape_configs: + - job_name: 'swarms' + ec2_sd_configs: + - region: us-east-1 + port: 9100 + scheme: http # Ensure Prometheus uses HTTP + relabel_configs: + # Keep only the EC2 instances with the tag Swarm set to superadmin + - source_labels: [__meta_ec2_tag_Swarm] + regex: superadmin + action: keep + + # Extract the public IP from EC2 metadata and assign it to __address__ + - source_labels: [__meta_ec2_public_ip] + regex: (.*) + target_label: __address__ + replacement: "${1}:9100" + + # Label the instances with their name and availability zone + - source_labels: [__meta_ec2_tag_Name, __meta_ec2_availability_zone] + target_label: instance \ No newline at end of file