-
Notifications
You must be signed in to change notification settings - Fork 1
/
collectl.conf
152 lines (125 loc) · 6.86 KB
/
collectl.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Copyright 2003-2009 Hewlett-Packard Development Company, LP
# Like most Linux configuration files, this specifies a set of user controller
# parameters. In many cases these are commented out which simply means those
# are the default values already used by collectl. To change a value
# uncomment the line and change it. To revert back to the default all you
# need do is recomment the line.
############################
# daemon/service handling
############################
# When someone specifies a daemon is to be run, typically but not limited
# to running collectl as a service, this string will cause the associated
# values to be used. They CAN also be overriden via a command line switch.
# In other words, if DaemonCommands is set to '-s cdm' and collectl is
# envoked with -D, it will process subsystems 'cdm'. However, if it is envoked
# with '-D -s mnp' it will process subsystems 'mnp', there is no combining the
# two set of values. Be sure to include any switches a daemon is required to
# have such as -f and either -r or -R.
# NOTE - if things aren't behaving as expected, you can always try running
# collectl in non-daemon mode just to see if there are any error messages. If
# you include the -m switch, you can also look in the collectl log, which is
# stored in the logging directory.
DaemonCommands = -f /var/log/collectl -r00:00,7 -m -F60 -s+YZ --import lustreClient,s -P --export lexpr,f=/tmp/L
# This defines the location to look for all additional required files if formatit.ph
# is not in the same directory as collectl itself.
#ReqDir = /usr/share/collectl
# E x t r a L i b r a r i e s
# So far this has only been used during development, but if there are extra
# library locations that should be 'used', put them here.
#Libraries =
# S t a n d a r d U t i l i t i e s
# Note that by default collectl will look for lspci in both /sbin and
# /usr/sbin, but if listed here will only look in that one place.
#Grep = /bin/grep
#Egrep = /bin/egrep
#Ps = /bin/ps
#Rpm = /bin/rpm
#Lspci = /sbin/lspci
#Lctl = /usr/sbin/lctl
# I n f i n i b a n d S u p p o r t
# Collectl will assume open fabric and will attempt to use the perfquery
# utility to get the counters. If not there, it assumes Voltaire and will
# first look in /proc/voltaire/adaptor-mlx/stats and failing that will use
# the get_pcounter utiliy. Since collectl resets IB counters in the
# hardware you can disable its collection by commenting out the appropriate
# variable below. PQuery for OFED, PCounter for get_pcounter calls and
# VStat for ALL non-ofed access of any kind.
# can disable either by commenting out the reference to VStat/PQuery below.
PQuery = /usr/sbin/perfquery:/usr/bin/perfquery:/usr/local/ofed/bin/perfquery
PCounter = /usr/mellanox/bin/get_pcounter
VStat = /usr/mellanox/bin/vstat:/usr/bin/vstat
OfedInfo = /usr/bin/ofed_info:/usr/local/ofed/bin/ofed_info
# D e f a u l t s
# This set of variables are actually all set in collectl and you need not
# change them.
# This parameter controls subsystem selection. The 'core' subsystems are
# selected when the user omits the -s switch OR uses the '+' or '-' to
# add/remove from that list. Note that changing this will also change the
# default for -s displayed in help.
#SubsysCore = bcdfijlmnstx
# although these can all be overridden by switches, they're assumed to
# always be defined so don't remove or comment any of them out! Over time
# more may be added
#Interval = 10
#Interval2 = 60
#Interval3 = 120
# These are SFS lustre specific. When using the -OD switch, any partitions
# found to be smaller than LustreSvcLunMax, which is in GB, will be ignored.
# When displaying data in verbose mode, only LustreMaxBlkSize will be
# displayed, but ALL block sizes will be read and recorded
#LustreSvcLunMax = 10
#LustreMaxBlkSize = 512
# By default, we check at these frequencies to see if lustre or interconnect
# configurations have changed. Things are efficient enough that now we can
# check for lustre changes every polling interval but I'm leaving the code
# in place rather than remove it in case needed again in the future.
#LustreConfigInt = 1
#InterConnectInt = 900
# These apply to disk/partition limits for exception (-o x/X) processing
#LimSVC = 30 # Minimum partition Avg Service time
#LimIOS = 10 # Minumum number of Disk OR Partion I/Os
#LimBool = 0 # generate exception record if EITHER limit exceeded
#LimLusKBS = 100 # Minimum number of Lustre OSS KB/sec
#LimLusReints = 1000 # Minimum number o Lustre MDS Reint operations
# Socket I/O Defaults
#Port = 2655
#Timeout = 10
# Maximum allowable zlib errors in a single day or run.
#MaxZlibErrors = 20
# To disable bogus network data checking, set this to any negative value
#DefNetSpeed=10000
# Collectl will automatically size the frequency of headers in 'brief format'
# to the height of your display window which it determines using the resize
# utility. If that utility can't be found, it will use the height speficied
# in 'TermSize'. If 'resize' is in your path but you want a fixed/different
# size, comment out the Resize line and uncomment TermHeight, setting it to
# what you want.
#TermHeight = 24
Resize=/usr/bin/resize:/usr/X11R6/bin/resize
# To turn off Time:HiRes/glibc incompatibility checking, the following
# should be enabled and set to 0
#TimeHiResCheck = 1
# These control environmental monitoring and to use it you MUST have ipmitool
# installed (see http://ipmitool.sourceforge.net/). If not in the path shown
# below, you must change it.
Ipmitool = /usr/bin/ipmitool:/usr/local/bin/ipmitool:/opt/hptc/sbin/ipmitool
IpmiCache = /var/run/collectl-ipmicache
IpmiTypes = fan,temp,current
# passwd file for UID to usernames mapping during process monitoring
#Passwd = /etc/passwd
# If a cciss device is reset (such as when during a lun scan) while collectl running,
# disk rates will be excessive. If one seen above the following, reset ALL stats for
# that disk to 0. To disable set this to -1
#DiskMaxValue=5000000
# When collectl reads disk data, it filters out any that don't match the DiskFilter,
# which by default looks for cciss, hd, sd, xvd, dm, emcpower and psv. All others are
# ignored. To change the filter, set the string below to those you want to keep BUT
# you need to know what a perl regular expression looks like or you may not get the
# desired results. CAUTION - white space is CRITICAL for this to work.
#DiskFilter = /cciss/c\d+d\d+ |hd[ab] | sd[a-z]+ |dm-\d+ |xvd[a-z] |fio[a-z]+ | vd[a-z]+ |emcpower[a-z]+ |psv\d+ |nvme\d+n\d+ /
# Kernel Efficiency Test
# On kernels 2.6.32 forward (and you can't tell how distros patched) there is a read inefficiency
# in the /proc filesystem for 4 and more sockets and the only way to tell is to test it. If slow
# generate a warning that patching the kernel may be recommmended. To bypass the test/message, set
# the following to 'no'
#ProcReadTest = yes