diff --git a/examples/plugin_log_task_usage/StressTest.wdl b/examples/plugin_log_task_usage/StressTest.wdl index 93adcc88..01e49f56 100644 --- a/examples/plugin_log_task_usage/StressTest.wdl +++ b/examples/plugin_log_task_usage/StressTest.wdl @@ -1,5 +1,6 @@ version 1.1 # MINIWDL__LOG_TASK_USAGE__PERIOD=2 miniwdl run examples/plugin_log_task_usage/StressTest.wdl --dir /tmp --verbose +# MINIWDL__LOG_TASK_USAGE__PERIOD=2 miniwdl-aws-submit plugin_log_task_usage/StressTest.wdl --verbose --follow task StressTest { input { diff --git a/examples/plugin_log_task_usage/miniwdl_log_task_usage.py b/examples/plugin_log_task_usage/miniwdl_log_task_usage.py index 1e9bda97..b3babe9f 100644 --- a/examples/plugin_log_task_usage/miniwdl_log_task_usage.py +++ b/examples/plugin_log_task_usage/miniwdl_log_task_usage.py @@ -7,7 +7,7 @@ set configuration [log_task_usage] period (or the environment variable MINIWDL__LOG_TASK_USAGE__PERIOD) to the desired logging period in seconds. -YMMV because host OS version & configuration may affect availability of the cgroup v2 counters read +YMMV because host OS version & configuration may affect availability of the cgroup counters read from pseudo-files under /sys/fs/cgroup """ @@ -31,67 +31,58 @@ def main(cfg, logger, run_id, run_dir, task, **recv): _logger_sh = r""" _miniwdl_log_task_usage() { set +ex - PERIOD_SECS=${1:-10} # logging period (default 10s) - if [ ! -f /sys/fs/cgroup/cpu.stat ] || [ ! -f /sys/fs/cgroup/io.stat ]; then - >&2 echo "miniwdl_log_task_usage unable to report: cgroup v2 counters /sys/fs/cgroup/{cpu,io}.stat not found" + local PERIOD_SECS=${1:-10} # logging period (default 10s) + + # detect whether host provides cgroup v2 or v1, and helper functions to read CPU & memory usage + # counters from the appropriate pseudo-files + local cgroup_version="" + if [ -f /sys/fs/cgroup/cpu.stat ]; then + cgroup_version=2 + elif [ -f /sys/fs/cgroup/cpuacct/cpuacct.stat ]; then + cgroup_version=1 + else + >&2 echo "miniwdl_log_task_usage unable to report: cgroup CPU usage counters not found" exit 1 fi - # CPU usage in microseconds - cpu_usecs() { - awk '/^usage_usec/ {print $2}' /sys/fs/cgroup/cpu.stat + cpu_secs() { + local ans + if [ $cgroup_version -eq 2 ]; then + ans=$(awk '/^usage_usec/ {print $2}' /sys/fs/cgroup/cpu.stat) + echo $(( ans / 1000000 )) + else + ans=$(cut -f2 -d ' ' /sys/fs/cgroup/cpuacct/cpuacct.stat | head -n 1) + echo $(( ans / 100 )) # 100 "jiffies" per second + fi } - # total block device I/O - io_bytes() { - awk '{ - for (i = 1; i <= NF; i++) { - if ($i ~ /^rbytes=/) { - rbytes += substr($i, index($i, "=") + 1); - } - if ($i ~ /^wbytes=/) { - wbytes += substr($i, index($i, "=") + 1); - } - } - } - END { - print rbytes, wbytes; - }' /sys/fs/cgroup/io.stat + mem_bytes() { + if [ $cgroup_version -eq 2 ]; then + awk '$1 == "anon" { print $2 }' /sys/fs/cgroup/memory.stat + else + awk -F ' ' '$1 == "total_rss" { print $2 }' /sys/fs/cgroup/memory/memory.stat + fi } - T_0=$(date +%s) - cpu_usecs_0=$(cpu_usecs) - read rbytes0 wbytes0 < <(io_bytes) - cpu_usecs_last=$cpu_usecs_0 - rbytes_last=$rbytes0 - wbytes_last=$wbytes0 - t_last=$T_0 - mem_max_bytes=0 + local T_0=$(date +%s) + local t_last=$T_0 + local cpu_secs_0=$(cpu_secs) + local cpu_secs_last=$cpu_secs_0 while true; do sleep "$PERIOD_SECS" - t=$(date +%s) - wall_secs=$(( t - T_0 )) + local t=$(date +%s) + local wall_secs=$(( t - T_0 )) - cpu_usecs_current=$(cpu_usecs) - cpu_total_usecs=$(( cpu_usecs_current - cpu_usecs_0 )) - cpu_period_usecs=$(( cpu_usecs_current - cpu_usecs_last )) + local cpu_secs_current=$(cpu_secs) + local cpu_total_secs=$(( cpu_secs_current - cpu_secs_0 )) + local cpu_period_secs=$(( cpu_secs_current - cpu_secs_last )) - read rbytes_current wbytes_current < <(io_bytes) - rbytes_total=$(( rbytes_current - rbytes0 )) - wbytes_total=$(( wbytes_current - wbytes0 )) - rbytes_period=$(( rbytes_current - rbytes_last )) - wbytes_period=$(( wbytes_current - wbytes_last )) + local mem_bytes_current=$(mem_bytes) - mem_bytes=$(awk '$1 == "anon" { print $2 }' /sys/fs/cgroup/memory.stat) - mem_max_bytes=$(( mem_bytes > mem_max_bytes ? mem_bytes : mem_max_bytes )) + >&2 echo "container usage :: cpu_pct: $(( 100 * cpu_period_secs / PERIOD_SECS )), mem_MiB: $(( mem_bytes_current/1048576 )), cpu_total_s: ${cpu_total_secs}, elapsed_s: ${wall_secs}" - >&2 echo "container usage (last ${PERIOD_SECS}s) :: cpu_pct: $(( 100 * cpu_period_usecs / 1000000 / PERIOD_SECS )), mem_GiB: $(( mem_bytes/1073741824 )), io_read_MiB: $(( rbytes_period/1048576 )), io_write_MiB: $(( wbytes_period/1048576 ))" - >&2 echo "container usage (total ${wall_secs}s) :: cpu_s: $(( cpu_total_usecs / 1000000 )), mem_max_GiB: $(( mem_max_bytes/1073741824 )), io_read_GiB: $(( rbytes_total/1073741824 )), io_write_GiB: $(( wbytes_total/1073741824 ))" - - cpu_usecs_last=$cpu_usecs_current - rbytes_last=$rbytes_current - wbytes_last=$wbytes_current + cpu_secs_last=$cpu_secs_current t_last=$t done }