From d048b0542cab49ceac3f92c63671d4133b092090 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2023 07:26:21 -0800 Subject: [PATCH] toplev: Move most remaining top level code into functions --- toplev.py | 1008 +++++++++++++++++++++++++++++------------------------ 1 file changed, 543 insertions(+), 465 deletions(-) diff --git a/toplev.py b/toplev.py index a93246e2..ef93956f 100755 --- a/toplev.py +++ b/toplev.py @@ -475,8 +475,9 @@ def del_arg_val(arg, flag): i = findprefix(arg, flag, "--") del arg[i:i+2 if arg[i] == flag else i+1] -p = argparse.ArgumentParser(usage='toplev [options] perf-arguments', -description=''' +def handle_args(): + p = argparse.ArgumentParser(usage='toplev [options] perf-arguments', + description=''' Estimate on which part of the CPU pipeline a workload bottlenecks using the TopDown model. The bottlenecks are expressed as a tree with different levels. Requires a modern Intel CPU. @@ -525,200 +526,202 @@ def del_arg_val(arg, flag): toplev needs a new enough perf tool and has specific requirements on the kernel. See http://github.com/andikleen/pmu-tools/wiki/toplev-kernel-support.''', -formatter_class=argparse.RawDescriptionHelpFormatter) -g = p.add_argument_group('General operation') -g.add_argument('--interval', '-I', help='Measure every ms instead of only once', - type=int) -g.add_argument('--no-multiplex', - help='Do not multiplex, but run the workload multiple times as needed. ' - 'Requires reproducible workloads.', - action='store_true') -g.add_argument('--single-thread', '-S', help='Measure workload as single thread. Workload must run single threaded. ' - 'In SMT mode other thread must be idle.', action='store_true') -g.add_argument('--fast', '-F', help='Skip sanity checks to optimize CPU consumption', action='store_true') -g.add_argument('--import', help='Import specified perf stat output file instead of running perf. ' - 'Must be for same cpu, same arguments, same /proc/cpuinfo, same topology, unless overriden', - dest='import_') -g.add_argument('--subset', help="Process only a subset of the input file with --import. " - "Valid syntax: a-b. Process from seek offset a to b. b is optional. " - "x/n%% process x'th n percent slice. Starts counting at 0. Add - to process to end of input. " - "sample:n%% Sample each time stamp in input with n%% (0-100%%) probability. " - "toplev will automatically round to the next time stamp boundary.") -g.add_argument('--parallel', - help="Run toplev --import in parallel in N processes, or the system's number of CPUs if 0 is specified", - action='store_true') -g.add_argument('--pjobs', type=int, default=0, - help='Number of threads to run with parallel. Default is number of CPUs.') -g.add_argument('--gen-script', help='Generate script to collect perfmon information for --import later', - action='store_true') -g.add_argument('--script-record', help='Use perf stat record in script for faster recording or ' - 'import generated perf.data (requires new perf)', action='store_true') -g.add_argument('--drilldown', help='Automatically rerun to get more details on bottleneck', action='store_true') -g.add_argument('--show-cpu', help='Print current CPU type and exit', + formatter_class=argparse.RawDescriptionHelpFormatter) + g = p.add_argument_group('General operation') + g.add_argument('--interval', '-I', help='Measure every ms instead of only once', + type=int) + g.add_argument('--no-multiplex', + help='Do not multiplex, but run the workload multiple times as needed. ' + 'Requires reproducible workloads.', + action='store_true') + g.add_argument('--single-thread', '-S', help='Measure workload as single thread. Workload must run single threaded. ' + 'In SMT mode other thread must be idle.', action='store_true') + g.add_argument('--fast', '-F', help='Skip sanity checks to optimize CPU consumption', action='store_true') + g.add_argument('--import', help='Import specified perf stat output file instead of running perf. ' + 'Must be for same cpu, same arguments, same /proc/cpuinfo, same topology, unless overriden', + dest='import_') + g.add_argument('--subset', help="Process only a subset of the input file with --import. " + "Valid syntax: a-b. Process from seek offset a to b. b is optional. " + "x/n%% process x'th n percent slice. Starts counting at 0. Add - to process to end of input. " + "sample:n%% Sample each time stamp in input with n%% (0-100%%) probability. " + "toplev will automatically round to the next time stamp boundary.") + g.add_argument('--parallel', + help="Run toplev --import in parallel in N processes, or the system's number of CPUs if 0 is specified", action='store_true') + g.add_argument('--pjobs', type=int, default=0, + help='Number of threads to run with parallel. Default is number of CPUs.') + g.add_argument('--gen-script', help='Generate script to collect perfmon information for --import later', + action='store_true') + g.add_argument('--script-record', help='Use perf stat record in script for faster recording or ' + 'import generated perf.data (requires new perf)', action='store_true') + g.add_argument('--drilldown', help='Automatically rerun to get more details on bottleneck', action='store_true') + g.add_argument('--show-cpu', help='Print current CPU type and exit', + action='store_true') -g = p.add_argument_group('Measurement filtering') -g.add_argument('--kernel', help='Only measure kernel code', action='store_true') -g.add_argument('--user', help='Only measure user code', action='store_true') -g.add_argument('--cpu', '-C', help=argparse.SUPPRESS) -g.add_argument('--pid', '-p', help=argparse.SUPPRESS) -g.add_argument('--core', help='Limit output to cores. Comma list of Sx-Cx-Tx. All parts optional.') -g.add_argument('--no-aggr', '-A', help='Measure every CPU', action='store_true') -g.add_argument('--cputype', help='Limit to hybrid cpu type (atom or core)', choices=['atom', 'core']) - -g = p.add_argument_group('Select events') -g.add_argument('--level', '-l', help='Measure upto level N (max 6)', - type=int, default=-1) -g.add_argument('--metrics', '-m', help="Print extra metrics", action='store_true') -g.add_argument('--sw', help="Measure perf Linux metrics", action='store_true') -g.add_argument('--no-util', help="Do not measure CPU utilization", action='store_true') -g.add_argument('--tsx', help="Measure TSX metrics", action='store_true') -g.add_argument('--all', help="Measure everything available", action='store_true') -g.add_argument('--frequency', help="Measure frequency", action='store_true') -g.add_argument('--power', help='Display power metrics', action='store_true') -g.add_argument('--nodes', help='Include or exclude nodes (with + to add, -|^ to remove, ' - 'comma separated list, wildcards allowed, add * to include all children/siblings, ' - 'add /level to specify highest level node to match, ' - 'add ^ to match related siblings and metrics, ' - 'start with ! to only include specified nodes)') -g.add_argument('--metric-group', help='Add (+) or remove (-|^) metric groups of metrics, ' - 'comma separated list from --list-metric-groups.', default=None) -g.add_argument('--areas', help='Add specific areas. Comma separate list, wildcards allowed') -g.add_argument('--pinned', help='Run topdown metrics (on ICL+) pinned', action='store_true') -g.add_argument('--exclusive', help='Use exclusive groups. Requires new kernel and new perf', action='store_true') -g.add_argument('--thread', - help="Enable per thread SMT measurements for pre-ICL, at the cost of more multiplexing.", - action='store_true') -g.add_argument('--aux', help='Enable auxilliary hierarchy nodes on some models. ' - 'Auxiliary nodes offer alternate views of the same bottleneck component, which can impact observed bottleneck percentage totals', + g = p.add_argument_group('Measurement filtering') + g.add_argument('--kernel', help='Only measure kernel code', action='store_true') + g.add_argument('--user', help='Only measure user code', action='store_true') + g.add_argument('--cpu', '-C', help=argparse.SUPPRESS) + g.add_argument('--pid', '-p', help=argparse.SUPPRESS) + g.add_argument('--core', help='Limit output to cores. Comma list of Sx-Cx-Tx. All parts optional.') + g.add_argument('--no-aggr', '-A', help='Measure every CPU', action='store_true') + g.add_argument('--cputype', help='Limit to hybrid cpu type (atom or core)', choices=['atom', 'core']) + + g = p.add_argument_group('Select events') + g.add_argument('--level', '-l', help='Measure upto level N (max 6)', + type=int, default=-1) + g.add_argument('--metrics', '-m', help="Print extra metrics", action='store_true') + g.add_argument('--sw', help="Measure perf Linux metrics", action='store_true') + g.add_argument('--no-util', help="Do not measure CPU utilization", action='store_true') + g.add_argument('--tsx', help="Measure TSX metrics", action='store_true') + g.add_argument('--all', help="Measure everything available", action='store_true') + g.add_argument('--frequency', help="Measure frequency", action='store_true') + g.add_argument('--power', help='Display power metrics', action='store_true') + g.add_argument('--nodes', help='Include or exclude nodes (with + to add, -|^ to remove, ' + 'comma separated list, wildcards allowed, add * to include all children/siblings, ' + 'add /level to specify highest level node to match, ' + 'add ^ to match related siblings and metrics, ' + 'start with ! to only include specified nodes)') + g.add_argument('--metric-group', help='Add (+) or remove (-|^) metric groups of metrics, ' + 'comma separated list from --list-metric-groups.', default=None) + g.add_argument('--areas', help='Add specific areas. Comma separate list, wildcards allowed') + g.add_argument('--pinned', help='Run topdown metrics (on ICL+) pinned', action='store_true') + g.add_argument('--exclusive', help='Use exclusive groups. Requires new kernel and new perf', action='store_true') + g.add_argument('--thread', + help="Enable per thread SMT measurements for pre-ICL, at the cost of more multiplexing.", action='store_true') -g.add_argument('--node-metrics', '-N', help='Add metrics related to selected nodes, but hide when node is not crossing threshold', + g.add_argument('--aux', help='Enable auxilliary hierarchy nodes on some models. ' + 'Auxiliary nodes offer alternate views of the same bottleneck component, which can impact observed bottleneck percentage totals', action='store_true') -g.add_argument('--bottlenecks', '-B', help='Show bottlenecks view of Info.Bottleneck metrics', action='store_true') + g.add_argument('--node-metrics', '-N', help='Add metrics related to selected nodes, but hide when node is not crossing threshold', + action='store_true') + g.add_argument('--bottlenecks', '-B', help='Show bottlenecks view of Info.Bottleneck metrics', action='store_true') -g = p.add_argument_group('Model tunables') -g.add_argument('--fp16', help='Enable FP16 support in some models', action='store_true') -g.add_argument('--hbm-only', help='Enable HBM only mode in some models', action='store_true') + g = p.add_argument_group('Model tunables') + g.add_argument('--fp16', help='Enable FP16 support in some models', action='store_true') + g.add_argument('--hbm-only', help='Enable HBM only mode in some models', action='store_true') -g = p.add_argument_group('Query nodes') -g.add_argument('--list-metrics', help='List all metrics. Can be followed by prefixes to limit, ^ for full match', - action='store_true') -g.add_argument('--list-nodes', help='List all nodes. Can be followed by prefixes to limit, ^ for full match', - action='store_true') -g.add_argument('--list-metric-groups', help='List metric groups.', action='store_true') -g.add_argument('--list-all', help='List every supported node/metric/metricgroup. Can be followed by prefixes to limit, ^ for full match.', - action='store_true') -g.add_argument('--describe', help='Print full descriptions for listed node prefixes. Add ^ to require full match.', action='store_true') - -g = p.add_argument_group('Workarounds') -g.add_argument('--no-group', help='Dont use groups', action='store_true') -g.add_argument('--force-events', help='Assume kernel supports all events. May give wrong results.', - action='store_true') -g.add_argument('--ignore-errata', help='Do not disable events with errata', action='store_true', default=True) -g.add_argument('--handle-errata', help='Disable events with errata', action='store_true') -g.add_argument('--reserved-counters', default=0, help='Assume N generic counters are used elsewhere', type=int) - -g = p.add_argument_group('Filtering output') -g.add_argument('--only-bottleneck', help='Only print topdown bottleneck and associated metrics (unless overriden with --nodes)', action='store_true') -g.add_argument('--verbose', '-v', help='Print all results even when below threshold or exceeding boundaries. ' - 'Note this can result in bogus values, as the TopDown methodology relies on thresholds ' - 'to correctly characterize workloads. Values not crossing threshold are marked with <.', - action='store_true') - -g = p.add_argument_group('Output format') -g.add_argument('--per-core', help='Aggregate output per core', action='store_true') -g.add_argument('--per-socket', help='Aggregate output per socket', action='store_true') -g.add_argument('--per-thread', help='Aggregate output per CPU thread', action='store_true') -g.add_argument('--global', help='Aggregate output for all CPUs', action='store_true', dest='global_') -g.add_argument('--no-desc', help='Do not print event descriptions', action='store_true') -g.add_argument('--desc', help='Force event descriptions', action='store_true') -g.add_argument('--csv', '-x', help='Enable CSV mode with specified delimeter') -g.add_argument('--output', '-o', help='Set output file') -g.add_argument('--split-output', help='Generate multiple output files, one for each specified ' - 'aggregation option (with -o)', - action='store_true') -g.add_argument('--graph', help='Automatically graph interval output with tl-barplot.py', - action='store_true') -g.add_argument("--graph-cpu", help="CPU to graph using --graph") -g.add_argument('--title', help='Set title of graph') -g.add_argument('-q', '--quiet', help='Avoid unnecessary status output', action='store_true') -g.add_argument('--long-desc', help='Print long descriptions instead of abbreviated ones.', + g = p.add_argument_group('Query nodes') + g.add_argument('--list-metrics', help='List all metrics. Can be followed by prefixes to limit, ^ for full match', + action='store_true') + g.add_argument('--list-nodes', help='List all nodes. Can be followed by prefixes to limit, ^ for full match', + action='store_true') + g.add_argument('--list-metric-groups', help='List metric groups.', action='store_true') + g.add_argument('--list-all', help='List every supported node/metric/metricgroup. Can be followed by prefixes to limit, ^ for full match.', action='store_true') -g.add_argument('--columns', help='Print CPU output in multiple columns for each node', action='store_true') -g.add_argument('--json', help='Print output in JSON format for Chrome about://tracing', action='store_true') -g.add_argument('--summary', help='Print summary at the end. Only useful with -I', action='store_true') -g.add_argument('--no-area', help='Hide area column', action='store_true') -g.add_argument('--perf-output', help='Save perf stat output in specified file') -g.add_argument('--perf-summary', help='Save summarized perf stat output in specified file') -g.add_argument('--no-perf', help=argparse.SUPPRESS, action='store_true') # noop, for compatibility -g.add_argument('--perf', help='Print perf command line', action='store_true') -g.add_argument('--print', help="Only print perf command line. Don't run", action='store_true') -g.add_argument('--idle-threshold', help="Hide idle CPUs (default <5%% of busiest if not CSV, specify percent)", - default=None, type=float) -g.add_argument('--no-output', help="Don't print computed output. Does not affect --summary.", action='store_true') -g.add_argument('--no-mux', help="Don't print mux statistics", action="store_true") -g.add_argument('--abbrev', help="Abbreviate node names in output", action="store_true") -g.add_argument('--no-sort', help="Don't sort output by Metric group", action="store_true") - -g = p.add_argument_group('Environment') -g.add_argument('--force-cpu', help='Force CPU type', choices=[x[0] for x in known_cpus]) -g.add_argument('--force-topology', metavar='findsysoutput', help='Use specified topology file (find /sys/devices)') -g.add_argument('--force-cpuinfo', metavar='cpuinfo', help='Use specified cpuinfo file (/proc/cpuinfo)') -g.add_argument('--force-hypervisor', help='Assume running under hypervisor (no uncore, no offcore, no PEBS)', - action='store_true') -g.add_argument('--no-uncore', help='Disable uncore events', action='store_true') -g.add_argument('--no-check', help='Do not check that PMU units exist', action='store_true') - -g = p.add_argument_group('Additional information') -g.add_argument('--print-group', '-g', help='Print event group assignments', - action='store_true') -g.add_argument('--raw', help="Print raw values", action='store_true') -g.add_argument('--valcsv', '-V', help='Write raw counter values into CSV file') -g.add_argument('--stats', help='Show statistics on what events counted', action='store_true') - -g = p.add_argument_group('xlsx output') -g.add_argument('--xlsx', help='Generate xlsx spreadsheet output with data for ' - 'socket/global/thread/core/summary/raw views with 1s interval. ' - 'Add --single-thread to only get program output.') -g.add_argument('--set-xlsx', help=argparse.SUPPRESS, action='store_true') # set arguments for xlsx only -g.add_argument('--xnormalize', help='Add extra sheets with normalized data in xlsx files', action='store_true') -g.add_argument('--xchart', help='Chart data in xlsx files', action='store_true') -g.add_argument('--keep', help='Keep temporary files', action='store_true') -g.add_argument('--xkeep', dest='keep', action='store_true', help=argparse.SUPPRESS) - -g = p.add_argument_group('Sampling') -g.add_argument('--show-sample', help='Show command line to rerun workload with sampling', action='store_true') -g.add_argument('--run-sample', help='Automatically rerun workload with sampling', action='store_true') -g.add_argument('--sample-args', help='Extra arguments to pass to perf record for sampling. Use + to specify -', - default='-g') -g.add_argument('--sample-repeat', - help='Repeat measurement and sampling N times. This interleaves counting and sampling. ' - 'Useful for background collection with -a sleep X.', type=int) -g.add_argument('--sample-basename', help='Base name of sample perf.data files', default="perf.data") - -g.add_argument('-d', help=argparse.SUPPRESS, action='help') # prevent passing this to perf - -p.add_argument('--version', help=argparse.SUPPRESS, action='store_true') -p.add_argument('--debug', help=argparse.SUPPRESS, action='store_true') # enable scheduler debugging -p.add_argument('--dfilter', help=argparse.SUPPRESS, action='append') -p.add_argument('--repl', action='store_true', help=argparse.SUPPRESS) # start python repl after initialization -p.add_argument('--filterquals', help=argparse.SUPPRESS, action='store_true') # remove events not supported by perf -p.add_argument('--setvar', help=argparse.SUPPRESS, action='append') # set env variable (for test suite iterating options) -p.add_argument('--tune', nargs='+', help=argparse.SUPPRESS) # override global variables with python expression -p.add_argument('--tune-model', nargs='+', help=argparse.SUPPRESS) # override global variables late with python expression -p.add_argument('--force-bn', action='append', help=argparse.SUPPRESS) # force bottleneck for testing -p.add_argument('--no-json-header', action='store_true', help=argparse.SUPPRESS) # no [ for json -p.add_argument('--no-json-footer', action='store_true', help=argparse.SUPPRESS) # no ] for json -p.add_argument('--no-csv-header', action='store_true', help=argparse.SUPPRESS) # no header/version for CSV -p.add_argument('--no-csv-footer', action='store_true', help=argparse.SUPPRESS) # no version for CSV -p.add_argument('--no-version', action='store_true', help="Don't print version") -args, rest = p.parse_known_args() -io_set_args(args) - -if args.setvar: - for j in args.setvar: - l = j.split("=") - os.environ[l[0]] = l[1] + g.add_argument('--describe', help='Print full descriptions for listed node prefixes. Add ^ to require full match.', action='store_true') + + g = p.add_argument_group('Workarounds') + g.add_argument('--no-group', help='Dont use groups', action='store_true') + g.add_argument('--force-events', help='Assume kernel supports all events. May give wrong results.', + action='store_true') + g.add_argument('--ignore-errata', help='Do not disable events with errata', action='store_true', default=True) + g.add_argument('--handle-errata', help='Disable events with errata', action='store_true') + g.add_argument('--reserved-counters', default=0, help='Assume N generic counters are used elsewhere', type=int) + + g = p.add_argument_group('Filtering output') + g.add_argument('--only-bottleneck', help='Only print topdown bottleneck and associated metrics (unless overriden with --nodes)', action='store_true') + g.add_argument('--verbose', '-v', help='Print all results even when below threshold or exceeding boundaries. ' + 'Note this can result in bogus values, as the TopDown methodology relies on thresholds ' + 'to correctly characterize workloads. Values not crossing threshold are marked with <.', + action='store_true') + + g = p.add_argument_group('Output format') + g.add_argument('--per-core', help='Aggregate output per core', action='store_true') + g.add_argument('--per-socket', help='Aggregate output per socket', action='store_true') + g.add_argument('--per-thread', help='Aggregate output per CPU thread', action='store_true') + g.add_argument('--global', help='Aggregate output for all CPUs', action='store_true', dest='global_') + g.add_argument('--no-desc', help='Do not print event descriptions', action='store_true') + g.add_argument('--desc', help='Force event descriptions', action='store_true') + g.add_argument('--csv', '-x', help='Enable CSV mode with specified delimeter') + g.add_argument('--output', '-o', help='Set output file') + g.add_argument('--split-output', help='Generate multiple output files, one for each specified ' + 'aggregation option (with -o)', + action='store_true') + g.add_argument('--graph', help='Automatically graph interval output with tl-barplot.py', + action='store_true') + g.add_argument("--graph-cpu", help="CPU to graph using --graph") + g.add_argument('--title', help='Set title of graph') + g.add_argument('-q', '--quiet', help='Avoid unnecessary status output', action='store_true') + g.add_argument('--long-desc', help='Print long descriptions instead of abbreviated ones.', + action='store_true') + g.add_argument('--columns', help='Print CPU output in multiple columns for each node', action='store_true') + g.add_argument('--json', help='Print output in JSON format for Chrome about://tracing', action='store_true') + g.add_argument('--summary', help='Print summary at the end. Only useful with -I', action='store_true') + g.add_argument('--no-area', help='Hide area column', action='store_true') + g.add_argument('--perf-output', help='Save perf stat output in specified file') + g.add_argument('--perf-summary', help='Save summarized perf stat output in specified file') + g.add_argument('--no-perf', help=argparse.SUPPRESS, action='store_true') # noop, for compatibility + g.add_argument('--perf', help='Print perf command line', action='store_true') + g.add_argument('--print', help="Only print perf command line. Don't run", action='store_true') + g.add_argument('--idle-threshold', help="Hide idle CPUs (default <5%% of busiest if not CSV, specify percent)", + default=None, type=float) + g.add_argument('--no-output', help="Don't print computed output. Does not affect --summary.", action='store_true') + g.add_argument('--no-mux', help="Don't print mux statistics", action="store_true") + g.add_argument('--abbrev', help="Abbreviate node names in output", action="store_true") + g.add_argument('--no-sort', help="Don't sort output by Metric group", action="store_true") + + g = p.add_argument_group('Environment') + g.add_argument('--force-cpu', help='Force CPU type', choices=[x[0] for x in known_cpus]) + g.add_argument('--force-topology', metavar='findsysoutput', help='Use specified topology file (find /sys/devices)') + g.add_argument('--force-cpuinfo', metavar='cpuinfo', help='Use specified cpuinfo file (/proc/cpuinfo)') + g.add_argument('--force-hypervisor', help='Assume running under hypervisor (no uncore, no offcore, no PEBS)', + action='store_true') + g.add_argument('--no-uncore', help='Disable uncore events', action='store_true') + g.add_argument('--no-check', help='Do not check that PMU units exist', action='store_true') + + g = p.add_argument_group('Additional information') + g.add_argument('--print-group', '-g', help='Print event group assignments', + action='store_true') + g.add_argument('--raw', help="Print raw values", action='store_true') + g.add_argument('--valcsv', '-V', help='Write raw counter values into CSV file') + g.add_argument('--stats', help='Show statistics on what events counted', action='store_true') + + g = p.add_argument_group('xlsx output') + g.add_argument('--xlsx', help='Generate xlsx spreadsheet output with data for ' + 'socket/global/thread/core/summary/raw views with 1s interval. ' + 'Add --single-thread to only get program output.') + g.add_argument('--set-xlsx', help=argparse.SUPPRESS, action='store_true') # set arguments for xlsx only + g.add_argument('--xnormalize', help='Add extra sheets with normalized data in xlsx files', action='store_true') + g.add_argument('--xchart', help='Chart data in xlsx files', action='store_true') + g.add_argument('--keep', help='Keep temporary files', action='store_true') + g.add_argument('--xkeep', dest='keep', action='store_true', help=argparse.SUPPRESS) + + g = p.add_argument_group('Sampling') + g.add_argument('--show-sample', help='Show command line to rerun workload with sampling', action='store_true') + g.add_argument('--run-sample', help='Automatically rerun workload with sampling', action='store_true') + g.add_argument('--sample-args', help='Extra arguments to pass to perf record for sampling. Use + to specify -', + default='-g') + g.add_argument('--sample-repeat', + help='Repeat measurement and sampling N times. This interleaves counting and sampling. ' + 'Useful for background collection with -a sleep X.', type=int) + g.add_argument('--sample-basename', help='Base name of sample perf.data files', default="perf.data") + + g.add_argument('-d', help=argparse.SUPPRESS, action='help') # prevent passing this to perf + + p.add_argument('--version', help=argparse.SUPPRESS, action='store_true') + p.add_argument('--debug', help=argparse.SUPPRESS, action='store_true') # enable scheduler debugging + p.add_argument('--dfilter', help=argparse.SUPPRESS, action='append') + p.add_argument('--repl', action='store_true', help=argparse.SUPPRESS) # start python repl after initialization + p.add_argument('--filterquals', help=argparse.SUPPRESS, action='store_true') # remove events not supported by perf + p.add_argument('--setvar', help=argparse.SUPPRESS, action='append') # set env variable (for test suite iterating options) + p.add_argument('--tune', nargs='+', help=argparse.SUPPRESS) # override global variables with python expression + p.add_argument('--tune-model', nargs='+', help=argparse.SUPPRESS) # override global variables late with python expression + p.add_argument('--force-bn', action='append', help=argparse.SUPPRESS) # force bottleneck for testing + p.add_argument('--no-json-header', action='store_true', help=argparse.SUPPRESS) # no [ for json + p.add_argument('--no-json-footer', action='store_true', help=argparse.SUPPRESS) # no ] for json + p.add_argument('--no-csv-header', action='store_true', help=argparse.SUPPRESS) # no header/version for CSV + p.add_argument('--no-csv-footer', action='store_true', help=argparse.SUPPRESS) # no version for CSV + p.add_argument('--no-version', action='store_true', help="Don't print version") + args, rest = p.parse_known_args() + io_set_args(args) + if args.setvar: + for j in args.setvar: + l = j.split("=") + os.environ[l[0]] = l[1] + return args, rest + +args, rest = handle_args() def output_count(): return args.per_core + args.global_ + args.per_thread + args.per_socket @@ -956,15 +959,17 @@ def run_parallel(args, env): # XXX graph return ret -if args.idle_threshold: - idle_threshold = args.idle_threshold / 100. -elif args.csv or args.xlsx or args.set_xlsx: # not for args.graph - idle_threshold = 0 # avoid breaking programs that rely on the CSV output -else: - idle_threshold = 0.05 -if args.exclusive and args.pinned: - sys.exit("--exclusive and --pinned cannot be combined") +def init_idle_threshold(): + if args.idle_threshold: + idle_threshold = args.idle_threshold / 100. + elif args.csv or args.xlsx or args.set_xlsx: # not for args.graph + idle_threshold = 0 # avoid breaking programs that rely on the CSV output + else: + idle_threshold = 0.05 + return idle_threshold + +idle_threshold = init_idle_threshold() event_nocheck = args.import_ or args.no_check @@ -996,110 +1001,131 @@ def gen_cpu_name(cpu): env = tl_cpu.Env() -if args.force_cpu: - env.forcecpu = args.force_cpu - cpuname = gen_cpu_name(args.force_cpu) - if not os.getenv("EVENTMAP"): - os.environ["EVENTMAP"] = cpuname - if not os.getenv("UNCORE"): - os.environ["UNCORE"] = cpuname -if args.force_topology: - if not os.getenv("TOPOLOGY"): - os.environ["TOPOLOGY"] = args.force_topology - ocperf.topology = None # force reread -if args.force_cpuinfo: - env.cpuinfo = args.force_cpuinfo -if args.force_hypervisor: - env.hypervisor = True - -if args.parallel: - if not args.import_: - sys.exit("--parallel requires --import") - if args.import_.endswith(".xz") or args.import_.endswith(".gz"): - sys.exit("Uncompress input file first") # XXX - if args.perf_summary: - sys.exit("--parallel does not support --perf-summary") # XXX - if args.subset: - # XXX support sample - sys.exit("--parallel does not support --subset") - if args.json and multi_output() and not args.split_output: - sys.exit("--parallel does not support multi-output --json without --split-output") - if args.graph: - sys.exit("--parallel does not support --graph") # XXX - if args.pjobs == 0: - import multiprocessing - args.pjobs = multiprocessing.cpu_count() - sys.exit(run_parallel(args, env)) - -if rest[:1] == ["--"]: - rest = rest[1:] - -if args.cpu: - rest = ["--cpu", args.cpu] + rest -if args.pid: - rest = ["--pid", args.pid] + rest -if args.csv and len(args.csv) != 1: - sys.exit("--csv/-x argument can be only a single character") - -if args.xlsx: - init_xlsx(args) -if args.set_xlsx: - set_xlsx(args) +def update_args(): + if args.force_cpu: + env.forcecpu = args.force_cpu + cpuname = gen_cpu_name(args.force_cpu) + if not os.getenv("EVENTMAP"): + os.environ["EVENTMAP"] = cpuname + if not os.getenv("UNCORE"): + os.environ["UNCORE"] = cpuname + if args.force_topology: + if not os.getenv("TOPOLOGY"): + os.environ["TOPOLOGY"] = args.force_topology + ocperf.topology = None # force reread + if args.force_cpuinfo: + env.cpuinfo = args.force_cpuinfo + if args.force_hypervisor: + env.hypervisor = True + if args.sample_repeat: + args.run_sample = True + if args.handle_errata: + args.ignore_errata = False + if args.exclusive and args.pinned: + sys.exit("--exclusive and --pinned cannot be combined") + +update_args() + +def handle_parallel(): + if args.parallel: + if not args.import_: + sys.exit("--parallel requires --import") + if args.import_.endswith(".xz") or args.import_.endswith(".gz"): + sys.exit("Uncompress input file first") # XXX + if args.perf_summary: + sys.exit("--parallel does not support --perf-summary") # XXX + if args.subset: + # XXX support sample + sys.exit("--parallel does not support --subset") + if args.json and multi_output() and not args.split_output: + sys.exit("--parallel does not support multi-output --json without --split-output") + if args.graph: + sys.exit("--parallel does not support --graph") # XXX + if args.pjobs == 0: + import multiprocessing + args.pjobs = multiprocessing.cpu_count() + sys.exit(run_parallel(args, env)) + +handle_parallel() + +def handle_rest(rest): + if rest[:1] == ["--"]: + rest = rest[1:] + if args.cpu: + rest = ["--cpu", args.cpu] + rest + if args.pid: + rest = ["--pid", args.pid] + rest + if args.csv and len(args.csv) != 1: + sys.exit("--csv/-x argument can be only a single character") + + if args.xlsx: + init_xlsx(args) + if args.set_xlsx: + set_xlsx(args) + return rest + +rest = handle_rest(rest) open_output_files() -if args.perf_summary: - try: - args.perf_summary = flex_open_w(args.perf_summary) - except IOError as e: - sys.exit("Cannot open perf summary file %s: %s" % (args.perf_summary, e)) - # XXX force no_uncore because the resulting file cannot be imported otherwise? - -if args.all: - args.tsx = True - args.power = True - args.sw = True - args.metrics = True - args.frequency = True - args.level = 6 - -if args.only_bottleneck: - args.quiet = True - args.no_version = True - -if args.graph: - if not args.interval: - args.interval = 100 - extra = "" - if args.title: - title = args.title - else: - title = "cpu %s" % (args.graph_cpu if args.graph_cpu else 0) - extra += '--title "' + title + '" ' - if args.split_output: - sys.exit("--split-output not allowed with --graph") - if args.output: - extra += '--output "' + args.output + '" ' - if args.graph_cpu: - extra += "--cpu " + args.graph_cpu + " " - args.csv = ',' - cmd = "%s %s/tl-barplot.py %s /dev/stdin" % (sys.executable, exe_dir(), extra) - if not args.quiet: - print(cmd) - graphp = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, **popentext) - args.output = graphp.stdin +def update_args2(): + if args.perf_summary: + try: + args.perf_summary = flex_open_w(args.perf_summary) + except IOError as e: + sys.exit("Cannot open perf summary file %s: %s" % (args.perf_summary, e)) + # XXX force no_uncore because the resulting file cannot be imported otherwise? + + if args.all: + args.tsx = True + args.power = True + args.sw = True + args.metrics = True + args.frequency = True + args.level = 6 + + if args.only_bottleneck: + args.quiet = True + args.no_version = True + +update_args2() + +def handle_graph(): + graphp = None + if args.graph: + if not args.interval: + args.interval = 100 + extra = "" + if args.title: + title = args.title + else: + title = "cpu %s" % (args.graph_cpu if args.graph_cpu else 0) + extra += '--title "' + title + '" ' + if args.split_output: + sys.exit("--split-output not allowed with --graph") + if args.output: + extra += '--output "' + args.output + '" ' + if args.graph_cpu: + extra += "--cpu " + args.graph_cpu + " " + args.csv = ',' + cmd = "%s %s/tl-barplot.py %s /dev/stdin" % (sys.executable, exe_dir(), extra) + if not args.quiet: + print(cmd) + graphp = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, **popentext) + args.output = graphp.stdin + return graphp -if args.sample_repeat: - args.run_sample = True +graphp = handle_graph() -if args.handle_errata: - args.ignore_errata = False +def init_ring_filter(): + ring_filter = "" + if args.kernel and not args.user: + ring_filter = 'k' + if args.user and not args.kernel: + ring_filter = 'u' + return ring_filter -ring_filter = "" -if args.kernel and not args.user: - ring_filter = 'k' -if args.user and not args.kernel: - ring_filter = 'u' +ring_filter = init_ring_filter() MAX_ERROR = 0.05 @@ -1111,45 +1137,46 @@ def check_ratio(l): # XXX move into ectx cpu = tl_cpu.CPU(known_cpus, nocheck=event_nocheck, env=env) -if args.level < 0: - if args.bottlenecks: - args.level = 4 - else: - args.level = 2 if any([x >= 8 for x in cpu.counters.values()]) else 1 - -if args.show_cpu: - print("%s %s %s" % (cpu.true_name, cpu.pmu_name, cpu.name)) - sys.exit(0) - -desired_cpu = args.force_cpu if args.force_cpu else cpu.model -if desired_cpu in eventlist_alias: - r = eventlist_alias[desired_cpu] - if not os.getenv("EVENTMAP"): - os.environ["EVENTMAP"] = r - if not os.getenv("UNCORE"): - os.environ["UNCORE"] = r - -if cpu.pmu_name and cpu.pmu_name.startswith("generic") and not args.quiet: - print("warning: kernel is in architectural mode and might mismeasure events", file=sys.stderr) - print("Consider a kernel update. See https://github.com/andikleen/pmu-tools/wiki/toplev-kernel-support", file=sys.stderr) - if cpu.cpu in hybrid_cpus: - sys.exit("Hybrid %s not supported in architectural mode" % cpu.cpu) - -if args.xlsx and not forced_per_socket and cpu.sockets == 1: - args.per_socket = False -if args.xlsx and not forced_per_core and cpu.threads == 1: - args.per_core = False - -if cpu.hypervisor: - feat.max_precise = 0 - feat.has_max_precise = True - -if not pversion.has_uncore_expansion: - # XXX reenable power - args.no_uncore = True - -if cpu.hypervisor or args.no_uncore: - feat.supports_power = False +def update_args_cpu(): + if args.level < 0: + if args.bottlenecks: + args.level = 4 + else: + args.level = 2 if any([x >= 8 for x in cpu.counters.values()]) else 1 + if args.show_cpu: + print("%s %s %s" % (cpu.true_name, cpu.pmu_name, cpu.name)) + sys.exit(0) + desired_cpu = args.force_cpu if args.force_cpu else cpu.model + if desired_cpu in eventlist_alias: + r = eventlist_alias[desired_cpu] + if not os.getenv("EVENTMAP"): + os.environ["EVENTMAP"] = r + if not os.getenv("UNCORE"): + os.environ["UNCORE"] = r + + if cpu.pmu_name and cpu.pmu_name.startswith("generic") and not args.quiet: + print("warning: kernel is in architectural mode and might mismeasure events", file=sys.stderr) + print("Consider a kernel update. See https://github.com/andikleen/pmu-tools/wiki/toplev-kernel-support", file=sys.stderr) + if cpu.cpu in hybrid_cpus: + sys.exit("Hybrid %s not supported in architectural mode" % cpu.cpu) + + if args.xlsx and not forced_per_socket and cpu.sockets == 1: + args.per_socket = False + if args.xlsx and not forced_per_core and cpu.threads == 1: + args.per_core = False + + if cpu.hypervisor: + feat.max_precise = 0 + feat.has_max_precise = True + + if not pversion.has_uncore_expansion: + # XXX reenable power + args.no_uncore = True + + if cpu.hypervisor or args.no_uncore: + feat.supports_power = False + +update_args_cpu() def print_perf(r): if not (args.perf or args.print): @@ -3721,32 +3748,43 @@ def sysctl(name): return 0 return val -# check nmi watchdog -# XXX need to get this state from CSV import -if sysctl("kernel.nmi_watchdog") != 0 or os.getenv("FORCE_NMI_WATCHDOG"): - # XXX should probe if nmi watchdog runs on fixed or generic counter +def update_cpu(): + # check nmi watchdog + # XXX need to get this state from CSV import + if sysctl("kernel.nmi_watchdog") != 0 or os.getenv("FORCE_NMI_WATCHDOG"): + # XXX should probe if nmi watchdog runs on fixed or generic counter + for j in cpu.counters.keys(): + cpu.counters[j] -= 1 # FIXME + if not args.quiet and not args.import_: + print("Consider disabling nmi watchdog to minimize multiplexing", file=sys.stderr) + print("(echo 0 | sudo tee /proc/sys/kernel/nmi_watchdog or\n echo kernel.nmi_watchdog=0 >> /etc/sysctl.conf ; sysctl -p as root)", file=sys.stderr) + for j in cpu.counters.keys(): - cpu.counters[j] -= 1 # FIXME - if not args.quiet and not args.import_: - print("Consider disabling nmi watchdog to minimize multiplexing", file=sys.stderr) - print("(echo 0 | sudo tee /proc/sys/kernel/nmi_watchdog or\n echo kernel.nmi_watchdog=0 >> /etc/sysctl.conf ; sysctl -p as root)", file=sys.stderr) + cpu.counters[j] -= args.reserved_counters + + if cpu.cpu is None: + sys.exit("Unsupported CPU model %s %d" % (cpu.vendor, cpu.model,)) -for j in cpu.counters.keys(): - cpu.counters[j] -= args.reserved_counters +update_cpu() -if cpu.cpu is None: - sys.exit("Unsupported CPU model %s %d" % (cpu.vendor, cpu.model,)) +def get_kernel(): + kv = os.getenv("KERNEL_VERSION") + if not kv: + kv = platform.release() + return kv_to_key(list(map(int, kv.split(".")[:2]))) -kv = os.getenv("KERNEL_VERSION") -if not kv: - kv = platform.release() -kernel_version = kv_to_key(list(map(int, kv.split(".")[:2]))) +kernel_version = get_kernel() -if args.exclusive: - if kernel_version < 510: - sys.exit("--exclusive needs kernel 5.10+") - metrics_own_group = False - run_l1_parallel = False +def check_exclusive(): + if args.exclusive: + if kernel_version < 510: + sys.exit("--exclusive needs kernel 5.10+") + global metrics_own_group + metrics_own_group = False + global run_l1_parallel + run_l1_parallel = False + +check_exclusive() def ht_warning(): if cpu.ht and not args.quiet: @@ -3839,29 +3877,23 @@ def init_runner_list(): runner_list = init_runner_list() -pe = lambda x: None -if args.debug: - printed_error = set() - def print_err(x): - if x not in printed_error: - print(x) - printed_error.add(x) - pe = lambda e: print_err(e) +def handle_more_options(): + if args.single_thread: + cpu.ht = False -if args.single_thread: - cpu.ht = False + if args.quiet: + if not args.desc: + args.no_desc = True + args.no_util = True -if args.quiet: - if not args.desc: - args.no_desc = True - args.no_util = True +handle_more_options() def tune_model(model): if args.tune_model: for t in args.tune_model: exec(t) -def init_model(model, runner): +def init_model(model, runner, pe): version = model.version model.print_error = pe model.check_event = lambda ev: ectx.emap.getevent(ev) is not None @@ -3903,7 +3935,7 @@ def legacy_smt_setup(model): model.smt_enabled = cpu.ht smt_mode |= cpu.ht -def model_setup(runner, cpuname): +def model_setup(runner, cpuname, pe): global smt_mode if cpuname == "ivb": import ivb_client_ratios @@ -4021,9 +4053,9 @@ def model_setup(runner, cpuname): import simple_ratios model = simple_ratios - return init_model(model, runner) + return init_model(model, runner, pe) -def runner_emaps(): +def runner_emaps(pe): version = "" for runner in runner_list: runner.set_ectx() @@ -4036,24 +4068,38 @@ def runner_emaps(): (os.environ["EVENTMAP"] if "EVENTMAP" in os.environ else "?", cpu.model)) if version: version += ", " - version += model_setup(runner, cpu.cpu) + version += model_setup(runner, cpu.cpu, pe) runner.clear_ectx() return version -version = runner_emaps() +def setup_pe(): + pe = lambda x: None + if args.debug: + printed_error = set() + def print_err(x): + if x not in printed_error: + print(x) + printed_error.add(x) + pe = lambda e: print_err(e) + return pe + +version = runner_emaps(setup_pe()) + +def handle_misc_options(): + if args.version: + print("toplev, CPU: %s, TMA version: %s" % (cpu.cpu, version)) + sys.exit(0) -if args.version: - print("toplev, CPU: %s, TMA version: %s" % (cpu.cpu, version)) - sys.exit(0) + if args.gen_script: + args.quiet = True -if args.gen_script: - args.quiet = True + if args.subset: + if not args.import_: + sys.exit("--subset requires --import mode") + if args.script_record: + sys.exit("--subset cannot be used with --script-record. Generate temp file with perf stat report -x\\;") -if args.subset: - if not args.import_: - sys.exit("--subset requires --import mode") - if args.script_record: - sys.exit("--subset cannot be used with --script-record. Generate temp file with perf stat report -x\\;") +handle_misc_options() def handle_cmd(): if args.describe: @@ -4153,64 +4199,81 @@ def runner_filter(rest): rest = runner_filter(rest) -if not smt_mode and not args.single_thread and not args.no_aggr: - hybrid = cpu.cpu in hybrid_cpus - multi = output_count() - if multi > 0: - rest = add_args(rest, "-a") - if (multi > 1 or args.per_thread) and not hybrid: - args.no_aggr = True - if args.per_socket and multi == 1 and not hybrid: - rest = add_args(rest, "--per-socket") - if args.per_core and multi == 1 and not hybrid: - rest = add_args(rest, "--per-core") +def update_smt(rest): + if not smt_mode and not args.single_thread and not args.no_aggr: + hybrid = cpu.cpu in hybrid_cpus + multi = output_count() + if multi > 0: + rest = add_args(rest, "-a") + if (multi > 1 or args.per_thread) and not hybrid: + args.no_aggr = True + if args.per_socket and multi == 1 and not hybrid: + rest = add_args(rest, "--per-socket") + if args.per_core and multi == 1 and not hybrid: + rest = add_args(rest, "--per-core") + return rest + +rest = update_smt(rest) def runner_node_filter(): for r in runner_list: r.filter_nodes() runner_node_filter() + +def update_smt_mode(): + if smt_mode and not os.getenv('FORCEHT'): + # do not need SMT mode if no objects have Core scope + if not any_core_node(): + return False + return smt_mode + orig_smt_mode = smt_mode -if smt_mode and not os.getenv('FORCEHT'): - # do not need SMT mode if no objects have Core scope - if not any_core_node(): - smt_mode = False - -full_system = False -if not args.single_thread and smt_mode: - if not args.quiet and not args.import_: - print("Will measure complete system.") - if smt_mode: - if args.cpu: - print("Warning: --cpu/-C mode with HyperThread must specify all core thread pairs!", - file=sys.stderr) - if args.pid: - sys.exit("-p/--pid mode not compatible with SMT. Use sleep in global mode.") - check_root() - rest = add_args(rest, "-a") - args.no_aggr = True - full_system = True -else: - full_system = args.no_aggr or "--per-core" in rest or "--per-socket" in rest - -if args.no_aggr: - rest = add_args(rest, "-A") +smt_mode = update_smt_mode() + +def check_full_system(rest): + full_system = False + if not args.single_thread and smt_mode: + if not args.quiet and not args.import_: + print("Will measure complete system.") + if smt_mode: + if args.cpu: + print("Warning: --cpu/-C mode with HyperThread must specify all core thread pairs!", + file=sys.stderr) + if args.pid: + sys.exit("-p/--pid mode not compatible with SMT. Use sleep in global mode.") + check_root() + rest = add_args(rest, "-a") + args.no_aggr = True + full_system = True + else: + full_system = args.no_aggr or "--per-core" in rest or "--per-socket" in rest + if args.no_aggr: + rest = add_args(rest, "-A") + return full_system, rest + +full_system, rest = check_full_system(rest) output_numcpus = False -if (args.perf_output or args.perf_summary) and not args.no_csv_header: - ph = [] - if args.interval: - ph.append("Timestamp") - if full_system: - ph.append("Location") - if ("--per-socket" in rest or "--per-core" in rest) and not args.no_aggr: - ph.append("Num-CPUs") - output_numcpus = True - ph += ["Value", "Unit", "Event", "Run-Time", "Enabled", "", ""] - if args.perf_output: - args.perf_output.write(";".join(ph) + "\n") - if args.perf_summary: - args.perf_summary.write(";".join(ph) + "\n") + +def init_perf_output(): + if (args.perf_output or args.perf_summary) and not args.no_csv_header: + ph = [] + if args.interval: + ph.append("Timestamp") + if full_system: + ph.append("Location") + if ("--per-socket" in rest or "--per-core" in rest) and not args.no_aggr: + ph.append("Num-CPUs") + global output_numcpus + output_numcpus = True + ph += ["Value", "Unit", "Event", "Run-Time", "Enabled", "", ""] + if args.perf_output: + args.perf_output.write(";".join(ph) + "\n") + if args.perf_summary: + args.perf_summary.write(";".join(ph) + "\n") + +init_perf_output() def setup_cpus(rest, cpu): if args.cpu: @@ -4244,27 +4307,34 @@ def setup_cpus(rest, cpu): if args.pinned: run_l1_parallel = True -if args.json: - if args.csv: - sys.exit("Cannot combine --csv with --json") - if args.columns: - sys.exit("Cannot combine --columns with --json") - out = tl_output.OutputJSON(args.output, args.csv, args, version, cpu) -elif args.csv: - if args.columns: - out = tl_output.OutputColumnsCSV(args.output, args.csv, args, version, cpu) +def init_output(): + if args.json: + if args.csv: + sys.exit("Cannot combine --csv with --json") + if args.columns: + sys.exit("Cannot combine --columns with --json") + out = tl_output.OutputJSON(args.output, args.csv, args, version, cpu) + elif args.csv: + if args.columns: + out = tl_output.OutputColumnsCSV(args.output, args.csv, args, version, cpu) + else: + out = tl_output.OutputCSV(args.output, args.csv, args, version, cpu) + elif args.columns: + out = tl_output.OutputColumns(args.output, args, version, cpu) else: - out = tl_output.OutputCSV(args.output, args.csv, args, version, cpu) -elif args.columns: - out = tl_output.OutputColumns(args.output, args, version, cpu) -else: - out = tl_output.OutputHuman(args.output, args, version, cpu) - -if args.valcsv: - out.valcsv = csv.writer(args.valcsv, lineterminator='\n', delimiter=';') - if not args.no_csv_header: - out.valcsv.writerow(("Timestamp", "CPU", "Group", "Event", "Value", - "Perf-event", "Index", "STDDEV", "MULTI", "Nodes")) + out = tl_output.OutputHuman(args.output, args, version, cpu) + return out + +out = init_output() + +def init_valcsv(): + if args.valcsv: + out.valcsv = csv.writer(args.valcsv, lineterminator='\n', delimiter=';') + if not args.no_csv_header: + out.valcsv.writerow(("Timestamp", "CPU", "Group", "Event", "Value", + "Perf-event", "Index", "STDDEV", "MULTI", "Nodes")) + +init_valcsv() # XXX use runner_restart def runner_first_init(): @@ -4369,14 +4439,18 @@ def report_not_supported(runner_list): code.interact(banner='toplev repl', local=locals()) sys.exit(0) -if args.sample_repeat: - cnt = 1 - for j in range(args.sample_repeat): - ret, cnt = measure_and_sample(runner_list, cnt) - if ret: - break -else: - ret, count = measure_and_sample(runner_list, 0 if args.drilldown else None) +def measure(): + if args.sample_repeat: + cnt = 1 + for j in range(args.sample_repeat): + ret, cnt = measure_and_sample(runner_list, cnt) + if ret: + break + else: + ret, count = measure_and_sample(runner_list, 0 if args.drilldown else None) + return ret + +ret = measure() out.print_footer() out.flushfiles() @@ -4398,7 +4472,11 @@ def get_range(g): report_idle(runner_list) report_not_supported(runner_list) -if args.graph: - args.output.close() - graphp.wait() +def finish_graph(graphp): + if args.graph: + args.output.close() + graphp.wait() + +finish_graph(graphp) + sys.exit(ret)