Skip to content

Commit

Permalink
[centipede] add support for gathering stats for centipede runs
Browse files Browse the repository at this point in the history
This PR adds supports so that CF supports Centipede stats. This will
help understand better how centipede fuzzers are performing on
ClusterFuzz.
  • Loading branch information
paulsemel committed Jan 7, 2025
1 parent 13fb6f2 commit 2a9c6fb
Show file tree
Hide file tree
Showing 4 changed files with 575 additions and 7 deletions.
90 changes: 83 additions & 7 deletions src/clusterfuzz/_internal/bot/fuzzers/centipede/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@
"""Centipede engine interface."""

from collections import namedtuple
import csv
import os
import pathlib
import re
import shutil
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

from clusterfuzz._internal.bot.fuzzers import dictionary_manager
from clusterfuzz._internal.bot.fuzzers import engine_common
Expand All @@ -28,6 +33,7 @@
from clusterfuzz._internal.system import environment
from clusterfuzz._internal.system import new_process
from clusterfuzz.fuzz import engine
from clusterfuzz.stacktraces import constants as stacktraces_constants

_CLEAN_EXIT_SECS = 10

Expand Down Expand Up @@ -72,9 +78,68 @@ def _set_sanitizer_options(fuzzer_path):
environment.set_memory_tool_options(sanitizer_options_var, sanitizer_options)


def _parse_centipede_stats(
stats_file: str) -> Optional[Dict[str, Union[int, float]]]:
"""Parses the Centipede stats file and returns a dictionary with labels
and their respective values.
Args:
stats_file: the path to Centipede stats file.
Returns:
a dictionary containing the stats.
"""
if not os.path.exists(stats_file):
return None
with open(stats_file, 'r') as statsfile:
csvreader = csv.reader(statsfile)
l = list(csvreader)
# If the binary could not run at all, the file will be empty or with only
# the column description line.
if len(l) <= 1:
return None
return {
l[0][i]: float(l[-1][i]) if '.' in l[-1][i] else int(l[-1][i])
for i in range(0,
len(l[0]) - 1)
}


def _parse_centipede_logs(log_lines: List[str]) -> Dict[str, int]:
"""Parses Centipede outputs and generates stats for it.
Args:
log_lines: the log lines.
Returns:
the stats.
"""
stats = {
'crash_count': 0,
'timeout_count': 0,
'oom_count': 0,
'leak_count': 0,
}
for line in log_lines:
if re.search(stacktraces_constants.CENTIPEDE_TIMEOUT_REGEX, line):
stats['timeout_count'] = 1
continue
if re.search(stacktraces_constants.OUT_OF_MEMORY_REGEX, line):
stats['oom_count'] = 1
continue
if re.search(CRASH_REGEX, line):
stats['crash_count'] = 1
continue
return stats


class Engine(engine.Engine):
"""Centipede engine implementation."""

def __init__(self):
super().__init__()
self.workdir = self._create_temp_dir('workdir')

@property
def name(self):
return 'centipede'
Expand Down Expand Up @@ -126,8 +191,7 @@ def prepare(self, corpus_dir, target_path, build_dir):
# 1. Centipede-readable corpus file;
# 2. Centipede-readable feature file;
# 3. Crash reproducing inputs.
workdir = self._create_temp_dir('workdir')
arguments[constants.WORKDIR_FLAGNAME] = str(workdir)
arguments[constants.WORKDIR_FLAGNAME] = str(self.workdir)

# Directory corpus_dir saves the corpus files required by ClusterFuzz.
arguments[constants.CORPUS_DIR_FLAGNAME] = corpus_dir
Expand Down Expand Up @@ -214,6 +278,7 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
timeout = max_time + _CLEAN_EXIT_SECS
fuzz_result = runner.run_and_wait(
additional_args=options.arguments, timeout=timeout)
log_lines = fuzz_result.output.splitlines()
fuzz_result.output = Engine.trim_logs(fuzz_result.output)

reproducer_path = _get_reproducer_path(fuzz_result.output, reproducers_dir)
Expand All @@ -224,8 +289,20 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
str(reproducer_path), fuzz_result.output, [],
int(fuzz_result.time_executed)))

# Stats report is not available in Centipede yet.
stats = None
stats_filename = f'fuzzing-stats-{os.path.basename(target_path)}.000000.csv'
stats_file = os.path.join(self.workdir, stats_filename)
stats = _parse_centipede_stats(stats_file)
if not stats:
stats = {}
actual_duration = int(
stats.get('FuzzTimeSec_Avg', fuzz_result.time_executed or 0.0))
fuzzing_time_percent = 100 * actual_duration / float(max_time)
stats.update({
'expected_duration': int(max_time),
'actual_duration': actual_duration,
'fuzzing_time_percent': fuzzing_time_percent,
})
stats.update(_parse_centipede_logs(log_lines))
return engine.FuzzResult(fuzz_result.output, fuzz_result.command, crashes,
stats, fuzz_result.time_executed)

Expand Down Expand Up @@ -412,10 +489,9 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
TimeoutError: If the testcase minimization exceeds max_time.
"""
runner = _get_runner(target_path)
workdir = self._create_temp_dir('workdir')
args = [
f'--binary={target_path}',
f'--workdir={workdir}',
f'--workdir={self.workdir}',
f'--minimize_crash={input_path}',
f'--num_runs={constants.NUM_RUNS_PER_MINIMIZATION}',
'--seed=1',
Expand All @@ -425,7 +501,7 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
logs.warning(
'Testcase minimization timed out.', fuzzer_output=result.output)
raise TimeoutError('Minimization timed out.')
minimum_testcase = self._get_smallest_crasher(workdir)
minimum_testcase = self._get_smallest_crasher(self.workdir)
if minimum_testcase:
shutil.copyfile(minimum_testcase, output_path)
else:
Expand Down
Loading

0 comments on commit 2a9c6fb

Please sign in to comment.