Skip to content

Commit

Permalink
Merge branch 'master' into feature/triage-lifecycle-for-fuzzers
Browse files Browse the repository at this point in the history
  • Loading branch information
vitorguidi authored Dec 16, 2024
2 parents 4b86f25 + d3d1b76 commit 7190553
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 13 deletions.
64 changes: 58 additions & 6 deletions src/clusterfuzz/_internal/bot/tasks/utasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,50 @@ def __init__(self, subtask: _Subtask):
self._subtask = subtask
self._labels = None
self.utask_main_failure = None
self._utask_success_conditions = [
uworker_msg_pb2.ErrorType.NO_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.ANALYZE_NO_CRASH, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_BAD_STATE_MIN_MAX, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_NO_CRASH, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_LOW_CONFIDENCE_IN_REGRESSION_RANGE, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.MINIMIZE_UNREPRODUCIBLE_CRASH, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.MINIMIZE_CRASH_TOO_FLAKY, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.LIBFUZZER_MINIMIZATION_UNREPRODUCIBLE, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.ANALYZE_CLOSE_INVALID_UPLOADED, # pylint: disable=no-member
]
self._utask_maybe_retry_conditions = [
uworker_msg_pb2.ErrorType.ANALYZE_BUILD_SETUP, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.ANALYZE_NO_REVISIONS_LIST, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.TESTCASE_SETUP, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.MINIMIZE_SETUP, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.FUZZ_DATA_BUNDLE_SETUP_FAILURE, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.FUZZ_NO_FUZZ_TARGET_SELECTED, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_NO_CRASH, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_TIMEOUT, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_BUILD_SETUP_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_BUILD_SETUP_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_TIMEOUT_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.SYMBOLIZE_BUILD_SETUP_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.MINIMIZE_DEADLINE_EXCEEDED, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.MINIMIZE_DEADLINE_EXCEEDED_IN_MAIN_FILE_PHASE, # pylint: disable=no-member
]
self._utask_failure_conditions = [
uworker_msg_pb2.ErrorType.ANALYZE_NO_REVISION_INDEX, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.UNHANDLED, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.VARIANT_BUILD_SETUP, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.FUZZ_BUILD_SETUP_FAILURE, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.FUZZ_NO_FUZZER, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_REVISION_LIST_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_BUILD_NOT_FOUND, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.PROGRESSION_BAD_BUILD, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_REVISION_LIST_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_BUILD_NOT_FOUND, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.REGRESSION_BAD_BUILD_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.LIBFUZZER_MINIMIZATION_FAILED, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.CORPUS_PRUNING_FUZZER_SETUP_FAILED, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.CORPUS_PRUNING_ERROR, # pylint: disable=no-member
uworker_msg_pb2.ErrorType.FUZZ_BAD_BUILD, # pylint: disable=no-member
]

if subtask == _Subtask.PREPROCESS:
self._preprocess_start_time_ns = self.start_time_ns
Expand Down Expand Up @@ -125,6 +169,18 @@ def set_task_details(self,
# Ensure we always have a value after this method returns.
assert self._preprocess_start_time_ns is not None

def _infer_uworker_main_outcome(self, exc_type, uworker_error):
'''Infers, on a best effort basis, whether an uworker output implies
success or failure. If an unequivocal response is not possible,
classifies as maybe_retry.'''
if exc_type or uworker_error in self._utask_failure_conditions:
outcome = 'error'
elif uworker_error in self._utask_maybe_retry_conditions:
outcome = 'maybe_retry'
else:
outcome = 'success'
return outcome

def __exit__(self, _exc_type, _exc_value, _traceback):
# Ignore exception details, let Python continue unwinding the stack.

Expand All @@ -145,7 +201,8 @@ def __exit__(self, _exc_type, _exc_value, _traceback):
# The only case where a task might fail without throwing, is in
# utask_main, by returning an ErrorType proto which indicates
# failure.
outcome = 'error' if _exc_type or self.utask_main_failure else 'success'
outcome = self._infer_uworker_main_outcome(_exc_type,
self.utask_main_failure)
monitoring_metrics.TASK_OUTCOME_COUNT.increment({
**self._labels, 'outcome': outcome
})
Expand All @@ -166,11 +223,6 @@ def __exit__(self, _exc_type, _exc_value, _traceback):
monitoring_metrics.TASK_OUTCOME_COUNT_BY_ERROR_TYPE.increment(
trimmed_labels)

if error_condition != 'UNHANDLED_EXCEPTION':
task = self._labels['task']
subtask = self._labels['subtask']
logs.info(f'Task {task}, at subtask {subtask}, finished successfully.')


def ensure_uworker_env_type_safety(uworker_env):
"""Converts all values in |uworker_env| to str types.
Expand Down
39 changes: 33 additions & 6 deletions src/clusterfuzz/_internal/cron/triage.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,25 @@ def _set_testcase_stuck_state(testcase: data_types.Testcase, state: bool):
testcase.put()


untriaged_testcases = {}


def _increment_untriaged_testcase_count(job, status):
identifier = (job, status)
if identifier not in untriaged_testcases:
untriaged_testcases[identifier] = 0
untriaged_testcases[identifier] += 1


def _emit_untriaged_testcase_count_metric():
for (job, status) in untriaged_testcases:
monitoring_metrics.UNTRIAGED_TESTCASE_COUNT.set(
untriaged_testcases[(job, status)], labels={
'job': job,
'status': status,
})


def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase):
"""Emmits a metric to track age of untriaged testcases."""
if not testcase.timestamp:
Expand All @@ -331,6 +350,12 @@ def _emit_untriaged_testcase_age_metric(testcase: data_types.Testcase):
})


PENDING_CRITICAL_TASKS = 'pending_critical_tasks'
PENDING_PROGRESSION = 'pending_progression'
PENDING_GROUPING = 'pending_grouping'
PENDING_FILING = 'pending_filing'


def main():
"""Files bugs."""
try:
Expand All @@ -353,8 +378,6 @@ def main():

throttler = Throttler()

untriaged_testcases = 0

for testcase_id in data_handler.get_open_testcase_id_iterator():
logs.info(f'Triaging {testcase_id}')
try:
Expand Down Expand Up @@ -386,7 +409,8 @@ def main():
_set_testcase_stuck_state(testcase, True)
logs.info(f'Skipping testcase {testcase_id}, progression pending')
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
_increment_untriaged_testcase_count(testcase.job_type,
PENDING_PROGRESSION)
continue

# If the testcase has a bug filed already, no triage is needed.
Expand All @@ -410,6 +434,8 @@ def main():
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
_set_testcase_stuck_state(testcase, True)
_increment_untriaged_testcase_count(testcase.job_type,
PENDING_CRITICAL_TASKS)
logs.info(
f'Skipping testcase {testcase_id}, critical tasks still pending.')
continue
Expand All @@ -429,6 +455,7 @@ def main():
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
_set_testcase_stuck_state(testcase, True)
_increment_untriaged_testcase_count(testcase.job_type, PENDING_GROUPING)
logs.info(f'Skipping testcase {testcase_id}, pending grouping.')
continue

Expand All @@ -437,6 +464,7 @@ def main():
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
_set_testcase_stuck_state(testcase, True)
_increment_untriaged_testcase_count(testcase.job_type, PENDING_GROUPING)
logs.info(f'Skipping testcase {testcase_id}, pending grouping.')
continue

Expand Down Expand Up @@ -468,6 +496,7 @@ def main():
_emit_untriaged_testcase_age_metric(testcase)
untriaged_testcases += 1
_set_testcase_stuck_state(testcase, False)
_increment_untriaged_testcase_count(testcase.job_type, PENDING_FILING)

# File the bug first and then create filed bug metadata.
if not _file_issue(testcase, issue_tracker, throttler):
Expand All @@ -480,9 +509,7 @@ def main():
logs.info('Filed new issue %s for testcase %d.' % (testcase.bug_information,
testcase_id))

monitoring_metrics.UNTRIAGED_TESTCASE_COUNT.set(
untriaged_testcases, labels={})

_emit_untriaged_testcase_count_metric()
logs.info('Triage testcases succeeded.')
return True

Expand Down
5 changes: 4 additions & 1 deletion src/clusterfuzz/_internal/metrics/monitoring_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,10 @@
description='Number of testcases that were not yet triaged '
'(have not yet completed analyze, regression,'
' minimization, impact task), in hours.',
field_spec=[],
field_spec=[
monitor.StringField('job'),
monitor.StringField('status'),
],
)

ANALYZE_TASK_REPRODUCIBILITY = monitor.CounterMetric(
Expand Down

0 comments on commit 7190553

Please sign in to comment.