Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embedded Cluster Host Bundle #92

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 320 additions & 0 deletions host/embedded-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
apiVersion: troubleshoot.sh/v1beta2
kind: SupportBundle
metadata:
name: default
spec:
hostCollectors:
- cpu: {}
- hostOS: {}
- memory: {}
- blockDevices: {}
- hostServices: {}
- ipv4Interfaces: {}
- time: {}
- diskUsage:
collectorName: root-disk-usage
path: /
- diskUsage:
collectorName: openebs-disk-usage
path: /var/openebs/local
- run:
collectorName: k0s-status
command: k0s
args: [ "status" ]
- run:
collectorName: k0s-issue-template
command: sh
args: [ "-c", "uname -srvmo; cat /etc/os-release || lsb_release -a" ]
- run:
collectorName: k0s-sysinfo
command: k0s
args: [ "sysinfo" ]
- copy:
collectorName: installer-logs
path: /var/lib/embedded-cluster/logs/*.log
- run:
collectorName: k8s-api-healthz-6443
command: "curl"
args: ["-k", "https://localhost:6443/healthz?verbose"]
- run:
collectorName: "free"
command: "free"
args: ["-m"]
- run:
collectorName: "top"
command: "top"
args: ["-b", "-n", "1"]
- run:
collectorName: "uptime"
command: "uptime"
args: []
- run:
collectorName: "uname"
command: "uname"
args: ["-a"]
- run:
collectorName: "df"
command: "df"
args: ["-h"]
- run:
collectorName: "iostat"
command: "iostat"
args: ["-x"]
- run:
collectorName: "sestatus"
command: "sestatus"
args: []
- run:
collectorName: "apparmor-status"
command: "apparmor_status"
args: []
- run:
collectorName: "iptables"
command: "iptables"
args: ["-L", "-v"]
- run:
collectorName: "iptables-version"
command: "iptables"
args: ["-V"]
- run:
collectorName: "nftables-list"
command: "nft"
args: ["list", "table", "filter"]
- run:
collectorName: "ipvsadm"
command: "ipvsadm"
args: ["-l", "-n"]
- run:
collectorName: "lsblk"
command: "lsblk"
args: ["--fs"]
- run:
collectorName: "netstat-ports"
command: "netstat"
args: ["-t", "-u", "-l", "-p", "-n"]
- run:
collectorName: "netstat-route-table"
command: "netstat"
args: ["-r", "-n"]
- run:
collectorName: "resolvectl-status"
command: "resolvectl"
args: ["status"]
- run:
collectorName: "resolv-conf"
command: "cat"
args: ["/etc/resolv.conf"]
- run:
collectorName: "systemd-resolved-conf"
command: "cat"
args: ["/etc/systemd/resolved.conf"]
- run:
collectorName: "nsswitch-conf"
command: "cat"
args: ["/etc/nsswitch.conf"]
- run:
collectorName: "hosts"
command: "cat"
args: ["/etc/hosts"]
- run:
collectorName: "ip-route-table"
command: "ip"
args: ["route"]
- run:
collectorName: "sysctl"
command: "sysctl"
args: ["-a"]
# Gathering hostname info to help troubleshoot scenarios where the hostname mismatch
- run:
collectorName: "hostnames"
command: "sh"
args:
- -c
- |
echo "hostname = $(hostname)"
echo "/proc/sys/kernel/hostname = $(cat /proc/sys/kernel/hostname)"
echo "uname -n = $(uname -n)"
- http:
collectorName: curl-api-replicated-com
get:
url: https://api.replicated.com/healthz
- http:
collectorName: curl-get-replicated-com
get:
url: https://get.replicated.com/healthz
- http:
collectorName: curl-registry-replicated-com
get:
url: https://registry.replicated.com/healthz
- http:
collectorName: curl-proxy-replicated-com
get:
url: https://proxy.replicated.com/healthz
- http:
collectorName: curl-replicated-app
get:
url: https://replicated.app/healthz
- run:
collectorName: "du-root"
command: "sh"
args: ["-c", "du -Shax / --exclude /proc | sort -rh | head -20"]
- run:
collectorName: "mount"
command: "mount"
args: ["-l"]
- run:
collectorName: "vmstat"
command: "vmstat"
args: ["-w"]
- run:
collectorName: "ps-high-load"
command: "sh"
args: ["-c", "ps -eo s,user,cmd | grep ^[RD] | sort | uniq -c | sort -nbr | head -20"]
- run:
collectorName: "ps-detect-antivirus-and-security-tools"
command: "sh"
args: [-c, "ps -ef | grep -E 'clamav|sophos|esets_daemon|fsav|symantec|mfend|ds_agent|kav|bdagent|s1agent|falcon|illumio|xagt' | grep -v grep"]
- run:
collectorName: "journalctl-dmesg"
command: "journalctl"
args: ["--dmesg", "--no-pager", "-S", "7 days ago"]
hostAnalyzers:
- memory:
checkName: Amount of Memory
outcomes:
- warn:
when: "< 2G"
message: At least 2G of memory is recommended
- pass:
message: The system has at least 2G of memory
- diskUsage:
checkName: Root disk usage
collectorName: root-disk-usage
outcomes:
- fail:
when: "total < 40Gi"
message: The disk containing directory / has less than 40Gi of total space
- warn:
when: "used/total > 80%"
message: The disk containing directory / is more than 80% full
- warn:
when: "available < 10Gi"
message: The disk containing directory / has less than 10Gi of disk space available
- pass:
message: The disk containing directory / has sufficient space
- diskUsage:
checkName: OpenEBS disk usage
collectorName: openebs-disk-usage
outcomes:
- fail:
when: "total < 40Gi"
message: The disk containing OpenEBS volumes has less than 40Gi of space
- warn:
when: "used/total > 80%"
message: The disk containing OpenEBS volumes is more than 80% full
- warn:
when: "available < 10Gi"
message: The disk containing OpenEBS volumes has less than 10Gi of disk space available
- pass:
message: The disk containing directory OpenEBS volumes has sufficient space
- textAnalyze:
checkName: Kubernetes API probing
fileName: host-collectors/run-host/k0s-status.txt
regex: 'Kube-api probing successful: true'
outcomes:
- fail:
when: "false"
message: Kubernetes API probing is reporting a failure
- pass:
when: "true"
message: Kubernetes API probing is reporting success
- time:
checkName: "ntp-status"
outcomes:
- fail:
when: "ntp == unsynchronized+inactive"
message: "System clock is not synchronized"
- warn:
when: "ntp == unsynchronized+active"
message: System clock not yet synchronized
- pass:
when: "ntp == synchronized+active"
message: "System clock is synchronized"
- warn:
when: "timezone != UTC"
message: "Non UTC timezone can interfere with system function"
- pass:
when: "timezone == UTC"
message: "Timezone is set to UTC"
- http:
checkName: curl-k8s-api-6443
collectorName: curl-k8s-api-6443
outcomes:
- warn:
when: "error"
message: Unable to curl https://localhost:6443/healthz. Please, run `curl -k https://localhost:6443/healthz` to check further information.
- pass:
when: "statusCode == 200"
message: curl -k https://localhost:6443/healthz returned HTTP CODE response 200.
- warn:
message: "Unexpected response. HTTP CODE response is not 200. Please, run `curl -ki https://localhost:6443/healthz` to check further information."
- http:
checkName: curl-api-replicated-com
collectorName: curl-api-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://api.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://api.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-get-replicated-com
collectorName: curl-get-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://get.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://get.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-registry-replicated-com
collectorName: curl-registry-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://registry.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://registry.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-proxy-replicated-com
collectorName: curl-proxy-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://proxy.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://proxy.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-replicated-app
collectorName: curl-replicated-app
outcomes:
- warn:
when: "error"
message: Error connecting to https://replicated.app/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://replicated.app/healthz
- warn:
message: "Unexpected response"
Loading