Skip to content

Commit

Permalink
support both cgroup v1 and v2
Browse files Browse the repository at this point in the history
strike disk I/O because it does not cover network filesystems
  • Loading branch information
mlin committed Feb 4, 2024
1 parent 80bb042 commit 57319eb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 48 deletions.
1 change: 1 addition & 0 deletions examples/plugin_log_task_usage/StressTest.wdl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
version 1.1
# MINIWDL__LOG_TASK_USAGE__PERIOD=2 miniwdl run examples/plugin_log_task_usage/StressTest.wdl --dir /tmp --verbose
# MINIWDL__LOG_TASK_USAGE__PERIOD=2 miniwdl-aws-submit plugin_log_task_usage/StressTest.wdl --verbose --follow
task StressTest {
input {
Expand Down
87 changes: 39 additions & 48 deletions examples/plugin_log_task_usage/miniwdl_log_task_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
set configuration [log_task_usage] period (or the environment variable
MINIWDL__LOG_TASK_USAGE__PERIOD) to the desired logging period in seconds.
YMMV because host OS version & configuration may affect availability of the cgroup v2 counters read
YMMV because host OS version & configuration may affect availability of the cgroup counters read
from pseudo-files under /sys/fs/cgroup
"""

Expand All @@ -31,67 +31,58 @@ def main(cfg, logger, run_id, run_dir, task, **recv):
_logger_sh = r"""
_miniwdl_log_task_usage() {
set +ex
PERIOD_SECS=${1:-10} # logging period (default 10s)
if [ ! -f /sys/fs/cgroup/cpu.stat ] || [ ! -f /sys/fs/cgroup/io.stat ]; then
>&2 echo "miniwdl_log_task_usage unable to report: cgroup v2 counters /sys/fs/cgroup/{cpu,io}.stat not found"
local PERIOD_SECS=${1:-10} # logging period (default 10s)
# detect whether host provides cgroup v2 or v1, and helper functions to read CPU & memory usage
# counters from the appropriate pseudo-files
local cgroup_version=""
if [ -f /sys/fs/cgroup/cpu.stat ]; then
cgroup_version=2
elif [ -f /sys/fs/cgroup/cpuacct/cpuacct.stat ]; then
cgroup_version=1
else
>&2 echo "miniwdl_log_task_usage unable to report: cgroup CPU usage counters not found"
exit 1
fi
# CPU usage in microseconds
cpu_usecs() {
awk '/^usage_usec/ {print $2}' /sys/fs/cgroup/cpu.stat
cpu_secs() {
local ans
if [ $cgroup_version -eq 2 ]; then
ans=$(awk '/^usage_usec/ {print $2}' /sys/fs/cgroup/cpu.stat)
echo $(( ans / 1000000 ))
else
ans=$(cut -f2 -d ' ' /sys/fs/cgroup/cpuacct/cpuacct.stat | head -n 1)
echo $(( ans / 100 )) # 100 "jiffies" per second
fi
}
# total block device I/O
io_bytes() {
awk '{
for (i = 1; i <= NF; i++) {
if ($i ~ /^rbytes=/) {
rbytes += substr($i, index($i, "=") + 1);
}
if ($i ~ /^wbytes=/) {
wbytes += substr($i, index($i, "=") + 1);
}
}
}
END {
print rbytes, wbytes;
}' /sys/fs/cgroup/io.stat
mem_bytes() {
if [ $cgroup_version -eq 2 ]; then
awk '$1 == "anon" { print $2 }' /sys/fs/cgroup/memory.stat
else
awk -F ' ' '$1 == "total_rss" { print $2 }' /sys/fs/cgroup/memory/memory.stat
fi
}
T_0=$(date +%s)
cpu_usecs_0=$(cpu_usecs)
read rbytes0 wbytes0 < <(io_bytes)
cpu_usecs_last=$cpu_usecs_0
rbytes_last=$rbytes0
wbytes_last=$wbytes0
t_last=$T_0
mem_max_bytes=0
local T_0=$(date +%s)
local t_last=$T_0
local cpu_secs_0=$(cpu_secs)
local cpu_secs_last=$cpu_secs_0
while true; do
sleep "$PERIOD_SECS"
t=$(date +%s)
wall_secs=$(( t - T_0 ))
local t=$(date +%s)
local wall_secs=$(( t - T_0 ))
cpu_usecs_current=$(cpu_usecs)
cpu_total_usecs=$(( cpu_usecs_current - cpu_usecs_0 ))
cpu_period_usecs=$(( cpu_usecs_current - cpu_usecs_last ))
local cpu_secs_current=$(cpu_secs)
local cpu_total_secs=$(( cpu_secs_current - cpu_secs_0 ))
local cpu_period_secs=$(( cpu_secs_current - cpu_secs_last ))
read rbytes_current wbytes_current < <(io_bytes)
rbytes_total=$(( rbytes_current - rbytes0 ))
wbytes_total=$(( wbytes_current - wbytes0 ))
rbytes_period=$(( rbytes_current - rbytes_last ))
wbytes_period=$(( wbytes_current - wbytes_last ))
local mem_bytes_current=$(mem_bytes)
mem_bytes=$(awk '$1 == "anon" { print $2 }' /sys/fs/cgroup/memory.stat)
mem_max_bytes=$(( mem_bytes > mem_max_bytes ? mem_bytes : mem_max_bytes ))
>&2 echo "container usage :: cpu_pct: $(( 100 * cpu_period_secs / PERIOD_SECS )), mem_MiB: $(( mem_bytes_current/1048576 )), cpu_total_s: ${cpu_total_secs}, elapsed_s: ${wall_secs}"
>&2 echo "container usage (last ${PERIOD_SECS}s) :: cpu_pct: $(( 100 * cpu_period_usecs / 1000000 / PERIOD_SECS )), mem_GiB: $(( mem_bytes/1073741824 )), io_read_MiB: $(( rbytes_period/1048576 )), io_write_MiB: $(( wbytes_period/1048576 ))"
>&2 echo "container usage (total ${wall_secs}s) :: cpu_s: $(( cpu_total_usecs / 1000000 )), mem_max_GiB: $(( mem_max_bytes/1073741824 )), io_read_GiB: $(( rbytes_total/1073741824 )), io_write_GiB: $(( wbytes_total/1073741824 ))"
cpu_usecs_last=$cpu_usecs_current
rbytes_last=$rbytes_current
wbytes_last=$wbytes_current
cpu_secs_last=$cpu_secs_current
t_last=$t
done
}
Expand Down

0 comments on commit 57319eb

Please sign in to comment.