Skip to content

Commit

Permalink
Add support for users monitoring
Browse files Browse the repository at this point in the history
Default to low thresholds because we don't expect users to be signed-in
on servers, but this can be tuned the usual way with some flags.

For now, this check is not enabled by default.
  • Loading branch information
smortex committed Aug 16, 2022
1 parent b5270be commit 1fb092f
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions lib/riemann/tools/health.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class Health
opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
opt :uptime_warning, 'Uptime warning threshold', default: 86_400
opt :uptime_critical, 'Uptime critical threshold', default: 3600
opt :users_warning, 'Users warning threshold', default: 1
opt :users_critical, 'Users critical threshold', default: 1
opt :swap_warning, 'Swap warning threshold', default: 0.4
opt :swap_critical, 'Swap critical threshold', default: 0.5
opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu load memory disk swap]
Expand All @@ -34,6 +36,7 @@ def initialize
load: { critical: opts[:load_critical], warning: opts[:load_warning] },
memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
uptime: { critical: opts[:uptime_critical], warning: opts[:uptime_warning] },
users: { critical: opts[:users_critical], warning: opts[:users_warning] },
swap: { critical: opts[:swap_critical], warning: opts[:swap_warning] },
}
case (@ostype = `uname -s`.chomp.downcase)
Expand Down Expand Up @@ -80,6 +83,7 @@ def initialize
@swap = method :linux_swap
@supports_exclude_type = `df --help 2>&1 | grep -e "--exclude-type"` != ''
end
@users = method :users

opts[:checks].each do |check|
case check
Expand All @@ -93,6 +97,8 @@ def initialize
@memory_enabled = true
when 'uptime'
@uptime_enabled = true
when 'users'
@users_enabled = true
when 'swap'
@swap_enabled = true
end
Expand Down Expand Up @@ -120,6 +126,18 @@ def report_pct(service, fraction, report)
end
end

def report_int(service, value, report)
return unless value

if value >= @limits[service][:critical]
alert service, :critical, value, "#{value} #{report}"
elsif value >= @limits[service][:warning]
alert service, :warning, value, "#{value} #{report}"
else
alert service, :ok, value, "#{value} #{report}"
end
end

def report_uptime(uptime)
description = uptime_to_human(uptime)

Expand Down Expand Up @@ -391,6 +409,12 @@ def linux_uptime
report_uptime(value)
end

def users
value = uptime[:users]

report_int(:users, value, "user#{'s' if value != 1}")
end

def bsd_swap
_device, blocks, used, _avail, _capacity = `swapinfo`.lines.last.split(/\s+/)

Expand Down Expand Up @@ -442,6 +466,7 @@ def tick
@disk.call if @disk_enabled
@load.call if @load_enabled
@uptime.call if @uptime_enabled
@users.call if @users_enabled
@swap.call if @swap_enabled
end

Expand Down

0 comments on commit 1fb092f

Please sign in to comment.