diff --git a/src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py b/src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py index 97bf2e0475..913a3062ee 100644 --- a/src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py +++ b/src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py @@ -14,10 +14,15 @@ """Centipede engine interface.""" from collections import namedtuple +import csv import os import pathlib import re import shutil +from typing import Dict +from typing import List +from typing import Optional +from typing import Union from clusterfuzz._internal.bot.fuzzers import dictionary_manager from clusterfuzz._internal.bot.fuzzers import engine_common @@ -28,6 +33,7 @@ from clusterfuzz._internal.system import environment from clusterfuzz._internal.system import new_process from clusterfuzz.fuzz import engine +from clusterfuzz.stacktraces import constants as stacktraces_constants _CLEAN_EXIT_SECS = 10 @@ -72,9 +78,68 @@ def _set_sanitizer_options(fuzzer_path): environment.set_memory_tool_options(sanitizer_options_var, sanitizer_options) +def _parse_centipede_stats( + stats_file: str) -> Optional[Dict[str, Union[int, float]]]: + """Parses the Centipede stats file and returns a dictionary with labels + and their respective values. + + Args: + stats_file: the path to Centipede stats file. + + Returns: + a dictionary containing the stats. + """ + if not os.path.exists(stats_file): + return None + with open(stats_file, 'r') as statsfile: + csvreader = csv.reader(statsfile) + l = list(csvreader) + # If the binary could not run at all, the file will be empty or with only + # the column description line. + if len(l) <= 1: + return None + return { + l[0][i]: float(l[-1][i]) if '.' in l[-1][i] else int(l[-1][i]) + for i in range(0, + len(l[0]) - 1) + } + + +def _parse_centipede_logs(log_lines: List[str]) -> Dict[str, int]: + """Parses Centipede outputs and generates stats for it. + + Args: + log_lines: the log lines. + + Returns: + the stats. + """ + stats = { + 'crash_count': 0, + 'timeout_count': 0, + 'oom_count': 0, + 'leak_count': 0, + } + for line in log_lines: + if re.search(stacktraces_constants.CENTIPEDE_TIMEOUT_REGEX, line): + stats['timeout_count'] = 1 + continue + if re.search(stacktraces_constants.OUT_OF_MEMORY_REGEX, line): + stats['oom_count'] = 1 + continue + if re.search(CRASH_REGEX, line): + stats['crash_count'] = 1 + continue + return stats + + class Engine(engine.Engine): """Centipede engine implementation.""" + def __init__(self): + super().__init__() + self.workdir = self._create_temp_dir('workdir') + @property def name(self): return 'centipede' @@ -126,8 +191,7 @@ def prepare(self, corpus_dir, target_path, build_dir): # 1. Centipede-readable corpus file; # 2. Centipede-readable feature file; # 3. Crash reproducing inputs. - workdir = self._create_temp_dir('workdir') - arguments[constants.WORKDIR_FLAGNAME] = str(workdir) + arguments[constants.WORKDIR_FLAGNAME] = str(self.workdir) # Directory corpus_dir saves the corpus files required by ClusterFuzz. arguments[constants.CORPUS_DIR_FLAGNAME] = corpus_dir @@ -214,6 +278,7 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa timeout = max_time + _CLEAN_EXIT_SECS fuzz_result = runner.run_and_wait( additional_args=options.arguments, timeout=timeout) + log_lines = fuzz_result.output.splitlines() fuzz_result.output = Engine.trim_logs(fuzz_result.output) reproducer_path = _get_reproducer_path(fuzz_result.output, reproducers_dir) @@ -224,8 +289,20 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa str(reproducer_path), fuzz_result.output, [], int(fuzz_result.time_executed))) - # Stats report is not available in Centipede yet. - stats = None + stats_filename = f'fuzzing-stats-{os.path.basename(target_path)}.000000.csv' + stats_file = os.path.join(self.workdir, stats_filename) + stats = _parse_centipede_stats(stats_file) + if not stats: + stats = {} + actual_duration = int( + stats.get('FuzzTimeSec_Avg', fuzz_result.time_executed or 0.0)) + fuzzing_time_percent = 100 * actual_duration / float(max_time) + stats.update({ + 'expected_duration': int(max_time), + 'actual_duration': actual_duration, + 'fuzzing_time_percent': fuzzing_time_percent, + }) + stats.update(_parse_centipede_logs(log_lines)) return engine.FuzzResult(fuzz_result.output, fuzz_result.command, crashes, stats, fuzz_result.time_executed) @@ -412,10 +489,9 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path, TimeoutError: If the testcase minimization exceeds max_time. """ runner = _get_runner(target_path) - workdir = self._create_temp_dir('workdir') args = [ f'--binary={target_path}', - f'--workdir={workdir}', + f'--workdir={self.workdir}', f'--minimize_crash={input_path}', f'--num_runs={constants.NUM_RUNS_PER_MINIMIZATION}', '--seed=1', @@ -425,7 +501,7 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path, logs.warning( 'Testcase minimization timed out.', fuzzer_output=result.output) raise TimeoutError('Minimization timed out.') - minimum_testcase = self._get_smallest_crasher(workdir) + minimum_testcase = self._get_smallest_crasher(self.workdir) if minimum_testcase: shutil.copyfile(minimum_testcase, output_path) else: diff --git a/src/clusterfuzz/_internal/metrics/fuzzer_stats_schema.py b/src/clusterfuzz/_internal/metrics/fuzzer_stats_schema.py index 3f6b263950..09cd8131e3 100644 --- a/src/clusterfuzz/_internal/metrics/fuzzer_stats_schema.py +++ b/src/clusterfuzz/_internal/metrics/fuzzer_stats_schema.py @@ -403,10 +403,461 @@ 'type': 'INTEGER' }] + _COMMON_COLUMNS +_CENTIPEDE_SCHEMA = [{ + 'mode': 'NULLABLE', + 'name': 'NumCoveredPcs_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCoveredPcs_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCoveredPcs_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumExecs_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumExecs_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumExecs_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'ActiveCorpusSize_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'ActiveCorpusSize_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'ActiveCorpusSize_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'MaxEltSize_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'MaxEltSize_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'MaxEltSize_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'AvgEltSize_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'AvgEltSize_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'AvgEltSize_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'UnixMicros_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'UnixMicros_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'FuzzTimeSec_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'FuzzTimeSec_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'FuzzTimeSec_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumProxyCrashes_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumProxyCrashes_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumProxyCrashes_Sum', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'TotalCorpusSize_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'TotalCorpusSize_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'TotalCorpusSize_Sum', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'Num8BitCounterFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'Num8BitCounterFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'Num8BitCounterFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumDataFlowFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumDataFlowFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumDataFlowFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCmpFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCmpFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCmpFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCallStackFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCallStackFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumCallStackFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumBoundedPathFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumBoundedPathFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumBoundedPathFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumPcPairFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumPcPairFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumPcPairFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUserFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUserFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUserFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUnknownFts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUnknownFts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUnknownFts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumFuncsInFrontier_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumFuncsInFrontier_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumFuncsInFrontier_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'EngineRusageAvgCores_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'EngineRusageCpuPct_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'EngineRusageRssMb_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'EngineRusageVSizeMb_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser0Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser0Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser0Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser1Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser1Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser1Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser2Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser2Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser2Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser3Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser3Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser3Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser4Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser4Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser4Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser5Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser5Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser5Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser6Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser6Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser6Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser7Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser7Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser7Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser8Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser8Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser8Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser9Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser9Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser9Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser10Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser10Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser10Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser11Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser11Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser11Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser12Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser12Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser12Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser13Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser13Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser13Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser14Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser14Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser14Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser15Fts_Min', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser15Fts_Max', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'NumUser15Fts_Avg', + 'type': 'FLOAT' +}, { + 'mode': 'NULLABLE', + 'name': 'crash_count', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'oom_count', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'timeout_count', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'leak_count', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'expected_duration', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'actual_duration', + 'type': 'INTEGER' +}, { + 'mode': 'NULLABLE', + 'name': 'fuzzing_time_percent', + 'type': 'FLOAT' +}] + _COMMON_COLUMNS + _SCHEMA = { 'afl': _AFL_SCHEMA, 'honggfuzz': _HONGGFUZZ_SCHEMA, 'libFuzzer': _LIBFUZZER_SCHEMA, + 'centipede': _CENTIPEDE_SCHEMA, } diff --git a/src/clusterfuzz/_internal/tests/core/bot/fuzzers/centipede/centipede_engine_test.py b/src/clusterfuzz/_internal/tests/core/bot/fuzzers/centipede/centipede_engine_test.py index de206581b1..1a854bdf01 100644 --- a/src/clusterfuzz/_internal/tests/core/bot/fuzzers/centipede/centipede_engine_test.py +++ b/src/clusterfuzz/_internal/tests/core/bot/fuzzers/centipede/centipede_engine_test.py @@ -328,6 +328,15 @@ def _test_crash_log_regex(self, # Check the prefix was trimmed. self.assertNotRegex(results.logs, 'CRASH LOG:.*') + self.assertIsNotNone(results.stats) + + if content == 'oom': + self.assertEqual(results.stats['oom_count'], 1) + elif content == 'slo': + self.assertEqual(results.stats['timeout_count'], 1) + else: + self.assertEqual(results.stats['crash_count'], 1) + # Check the correct input was saved. with open(crash.input_path) as f: self.assertEqual(content, f.read()) diff --git a/src/local/butler/scripts/setup.py b/src/local/butler/scripts/setup.py index 8a7a13d009..520bf73c67 100644 --- a/src/local/butler/scripts/setup.py +++ b/src/local/butler/scripts/setup.py @@ -282,6 +282,38 @@ def __init__(self): super().__init__() self.name = 'centipede' self.key_id = 1342 + # Use single quotes since the string ends in a double quote. + # pylint: disable=line-too-long + self.stats_column_descriptions = '''fuzzer: "Fuzz target" +tests_executed: "Number of testcases executed during this time period" +new_crashes: "Number of new unique crashes observed during this time period" +edge_coverage: "Coverage for this fuzz target (number of edges/total)" +cov_report: "Link to coverage report" +corpus_size: "Size of the minimized corpus generated based on code coverage (number of testcases and total size on disk)" +avg_exec_per_sec: "Average number of testcases executed per second" +fuzzing_time_percent: "Percent of expected fuzzing time that is actually spent fuzzing." +regular_crash_percent: "Percent of fuzzing runs that had regular crashes (other than ooms, leaks, timeouts, startup and bad instrumentation crashes)" +oom_percent: "Percent of fuzzing runs that crashed on OOMs (should be 0)" +leak_percent: "Percent of fuzzing runs that crashed on memory leaks (should be 0)" +timeout_percent: "Percent of fuzzing runs that had testcases timeout (should be 0)" +total_fuzzing_time_hrs: "Total time in hours for which the fuzzer(s) ran. Will be lower if fuzzer hits a crash frequently." +logs: "Link to fuzzing logs" +corpus_backup: "Backup copy of the minimized corpus generated based on code coverage"''' + + self.stats_columns = """sum(t.number_of_executed_units) as tests_executed, +custom(j.new_crashes) as new_crashes, +_EDGE_COV as edge_coverage, +_COV_REPORT as cov_report, +_CORPUS_SIZE as corpus_size, +avg(t.FuzzTimeSec_Avg / t.NumExecs_Avg) as avg_exec_per_sec, +avg(t.fuzzing_time_percent) as fuzzing_time_percent, +avg(t.crash_count*100) as regular_crash_percent, +avg(t.oom_count*100) as oom_percent, +avg(t.leak_count*100) as leak_percent, +avg(t.timeout_count*100) as timeout_percent, +sum(t.actual_duration/3600.0) as total_fuzzing_time_hrs, +_FUZZER_RUN_LOGS as logs, +_CORPUS_BACKUP as corpus_backup,""" def setup_config(non_dry_run):