diff --git a/host/embedded-cluster.yaml b/host/embedded-cluster.yaml new file mode 100644 index 0000000..88741ab --- /dev/null +++ b/host/embedded-cluster.yaml @@ -0,0 +1,320 @@ +apiVersion: troubleshoot.sh/v1beta2 +kind: SupportBundle +metadata: + name: default +spec: + hostCollectors: + - cpu: {} + - hostOS: {} + - memory: {} + - blockDevices: {} + - hostServices: {} + - ipv4Interfaces: {} + - time: {} + - diskUsage: + collectorName: root-disk-usage + path: / + - diskUsage: + collectorName: openebs-disk-usage + path: /var/openebs/local + - run: + collectorName: k0s-status + command: k0s + args: [ "status" ] + - run: + collectorName: k0s-issue-template + command: sh + args: [ "-c", "uname -srvmo; cat /etc/os-release || lsb_release -a" ] + - run: + collectorName: k0s-sysinfo + command: k0s + args: [ "sysinfo" ] + - copy: + collectorName: installer-logs + path: /var/lib/embedded-cluster/logs/*.log + - run: + collectorName: k8s-api-healthz-6443 + command: "curl" + args: ["-k", "https://localhost:6443/healthz?verbose"] + - run: + collectorName: "free" + command: "free" + args: ["-m"] + - run: + collectorName: "top" + command: "top" + args: ["-b", "-n", "1"] + - run: + collectorName: "uptime" + command: "uptime" + args: [] + - run: + collectorName: "uname" + command: "uname" + args: ["-a"] + - run: + collectorName: "df" + command: "df" + args: ["-h"] + - run: + collectorName: "iostat" + command: "iostat" + args: ["-x"] + - run: + collectorName: "sestatus" + command: "sestatus" + args: [] + - run: + collectorName: "apparmor-status" + command: "apparmor_status" + args: [] + - run: + collectorName: "iptables" + command: "iptables" + args: ["-L", "-v"] + - run: + collectorName: "iptables-version" + command: "iptables" + args: ["-V"] + - run: + collectorName: "nftables-list" + command: "nft" + args: ["list", "table", "filter"] + - run: + collectorName: "ipvsadm" + command: "ipvsadm" + args: ["-l", "-n"] + - run: + collectorName: "lsblk" + command: "lsblk" + args: ["--fs"] + - run: + collectorName: "netstat-ports" + command: "netstat" + args: ["-t", "-u", "-l", "-p", "-n"] + - run: + collectorName: "netstat-route-table" + command: "netstat" + args: ["-r", "-n"] + - run: + collectorName: "resolvectl-status" + command: "resolvectl" + args: ["status"] + - run: + collectorName: "resolv-conf" + command: "cat" + args: ["/etc/resolv.conf"] + - run: + collectorName: "systemd-resolved-conf" + command: "cat" + args: ["/etc/systemd/resolved.conf"] + - run: + collectorName: "nsswitch-conf" + command: "cat" + args: ["/etc/nsswitch.conf"] + - run: + collectorName: "hosts" + command: "cat" + args: ["/etc/hosts"] + - run: + collectorName: "ip-route-table" + command: "ip" + args: ["route"] + - run: + collectorName: "sysctl" + command: "sysctl" + args: ["-a"] + # Gathering hostname info to help troubleshoot scenarios where the hostname mismatch + - run: + collectorName: "hostnames" + command: "sh" + args: + - -c + - | + echo "hostname = $(hostname)" + echo "/proc/sys/kernel/hostname = $(cat /proc/sys/kernel/hostname)" + echo "uname -n = $(uname -n)" + - http: + collectorName: curl-api-replicated-com + get: + url: https://api.replicated.com/healthz + - http: + collectorName: curl-get-replicated-com + get: + url: https://get.replicated.com/healthz + - http: + collectorName: curl-registry-replicated-com + get: + url: https://registry.replicated.com/healthz + - http: + collectorName: curl-proxy-replicated-com + get: + url: https://proxy.replicated.com/healthz + - http: + collectorName: curl-replicated-app + get: + url: https://replicated.app/healthz + - run: + collectorName: "du-root" + command: "sh" + args: ["-c", "du -Shax / --exclude /proc | sort -rh | head -20"] + - run: + collectorName: "mount" + command: "mount" + args: ["-l"] + - run: + collectorName: "vmstat" + command: "vmstat" + args: ["-w"] + - run: + collectorName: "ps-high-load" + command: "sh" + args: ["-c", "ps -eo s,user,cmd | grep ^[RD] | sort | uniq -c | sort -nbr | head -20"] + - run: + collectorName: "ps-detect-antivirus-and-security-tools" + command: "sh" + args: [-c, "ps -ef | grep -E 'clamav|sophos|esets_daemon|fsav|symantec|mfend|ds_agent|kav|bdagent|s1agent|falcon|illumio|xagt' | grep -v grep"] + - run: + collectorName: "journalctl-dmesg" + command: "journalctl" + args: ["--dmesg", "--no-pager", "-S", "7 days ago"] + hostAnalyzers: + - memory: + checkName: Amount of Memory + outcomes: + - warn: + when: "< 2G" + message: At least 2G of memory is recommended + - pass: + message: The system has at least 2G of memory + - diskUsage: + checkName: Root disk usage + collectorName: root-disk-usage + outcomes: + - fail: + when: "total < 40Gi" + message: The disk containing directory / has less than 40Gi of total space + - warn: + when: "used/total > 80%" + message: The disk containing directory / is more than 80% full + - warn: + when: "available < 10Gi" + message: The disk containing directory / has less than 10Gi of disk space available + - pass: + message: The disk containing directory / has sufficient space + - diskUsage: + checkName: OpenEBS disk usage + collectorName: openebs-disk-usage + outcomes: + - fail: + when: "total < 40Gi" + message: The disk containing OpenEBS volumes has less than 40Gi of space + - warn: + when: "used/total > 80%" + message: The disk containing OpenEBS volumes is more than 80% full + - warn: + when: "available < 10Gi" + message: The disk containing OpenEBS volumes has less than 10Gi of disk space available + - pass: + message: The disk containing directory OpenEBS volumes has sufficient space + - textAnalyze: + checkName: Kubernetes API probing + fileName: host-collectors/run-host/k0s-status.txt + regex: 'Kube-api probing successful: true' + outcomes: + - fail: + when: "false" + message: Kubernetes API probing is reporting a failure + - pass: + when: "true" + message: Kubernetes API probing is reporting success + - time: + checkName: "ntp-status" + outcomes: + - fail: + when: "ntp == unsynchronized+inactive" + message: "System clock is not synchronized" + - warn: + when: "ntp == unsynchronized+active" + message: System clock not yet synchronized + - pass: + when: "ntp == synchronized+active" + message: "System clock is synchronized" + - warn: + when: "timezone != UTC" + message: "Non UTC timezone can interfere with system function" + - pass: + when: "timezone == UTC" + message: "Timezone is set to UTC" + - http: + checkName: curl-k8s-api-6443 + collectorName: curl-k8s-api-6443 + outcomes: + - warn: + when: "error" + message: Unable to curl https://localhost:6443/healthz. Please, run `curl -k https://localhost:6443/healthz` to check further information. + - pass: + when: "statusCode == 200" + message: curl -k https://localhost:6443/healthz returned HTTP CODE response 200. + - warn: + message: "Unexpected response. HTTP CODE response is not 200. Please, run `curl -ki https://localhost:6443/healthz` to check further information." + - http: + checkName: curl-api-replicated-com + collectorName: curl-api-replicated-com + outcomes: + - warn: + when: "error" + message: Error connecting to https://api.replicated.com/healthz + - pass: + when: "statusCode == 200" + message: Connected to https://api.replicated.com/healthz + - warn: + message: "Unexpected response" + - http: + checkName: curl-get-replicated-com + collectorName: curl-get-replicated-com + outcomes: + - warn: + when: "error" + message: Error connecting to https://get.replicated.com/healthz + - pass: + when: "statusCode == 200" + message: Connected to https://get.replicated.com/healthz + - warn: + message: "Unexpected response" + - http: + checkName: curl-registry-replicated-com + collectorName: curl-registry-replicated-com + outcomes: + - warn: + when: "error" + message: Error connecting to https://registry.replicated.com/healthz + - pass: + when: "statusCode == 200" + message: Connected to https://registry.replicated.com/healthz + - warn: + message: "Unexpected response" + - http: + checkName: curl-proxy-replicated-com + collectorName: curl-proxy-replicated-com + outcomes: + - warn: + when: "error" + message: Error connecting to https://proxy.replicated.com/healthz + - pass: + when: "statusCode == 200" + message: Connected to https://proxy.replicated.com/healthz + - warn: + message: "Unexpected response" + - http: + checkName: curl-replicated-app + collectorName: curl-replicated-app + outcomes: + - warn: + when: "error" + message: Error connecting to https://replicated.app/healthz + - pass: + when: "statusCode == 200" + message: Connected to https://replicated.app/healthz + - warn: + message: "Unexpected response"