Skip to content

Commit

Permalink
Add logging for misbehaving distribution metrics (#4429)
Browse files Browse the repository at this point in the history
### Motivation

Some cumulative distribution metrics (build age, retrieval, testcase
age, testcase triage duration) are misbehaving and capping at 1. This PR
intends to aid in debugging that.
  • Loading branch information
vitorguidi authored Nov 22, 2024
1 parent adc50ff commit 58193a9
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 23 deletions.
15 changes: 9 additions & 6 deletions src/clusterfuzz/_internal/bot/tasks/utasks/fuzz_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -1887,12 +1887,15 @@ def run(self):
self.fuzz_task_output.crash_groups.extend(crash_groups)

fuzzing_session_duration = time.time() - start_time
monitoring_metrics.FUZZING_SESSION_DURATION.add(
fuzzing_session_duration, {
'fuzzer': self.fuzzer_name,
'job': self.job_type,
'platform': environment.platform()
})
labels = {
'fuzzer': self.fuzzer_name,
'job': self.job_type,
'platform': environment.platform()
}
logs.info(f'FUZZING_SESSION_DURATION: add {fuzzing_session_duration} '
'for {labels}.')
monitoring_metrics.FUZZING_SESSION_DURATION.add(fuzzing_session_duration,
labels)

return uworker_msg_pb2.Output(fuzz_task_output=self.fuzz_task_output) # pylint: disable=no-member

Expand Down
18 changes: 11 additions & 7 deletions src/clusterfuzz/_internal/build_management/build_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,14 +300,17 @@ def set_env_var(name, value):


def _emit_job_build_retrieval_metric(start_time, step, build_type):
"""Emits a metrick to track the distribution of build retrieval times."""
elapsed_minutes = (time.time() - start_time) / 60
monitoring_metrics.JOB_BUILD_RETRIEVAL_TIME.add(
elapsed_minutes, {
'job': os.getenv('JOB_NAME'),
'platform': environment.platform(),
'step': step,
'build_type': build_type,
})
labels = {
'job': os.getenv('JOB_NAME'),
'platform': environment.platform(),
'step': step,
'build_type': build_type,
}
logs.info(f'JOB_BUILD_RETRIEVAL_TIME: adding {elapsed_minutes} '
f'for labels {labels}.')
monitoring_metrics.JOB_BUILD_RETRIEVAL_TIME.add(elapsed_minutes, labels)


class BaseBuild:
Expand Down Expand Up @@ -1220,6 +1223,7 @@ def _emit_build_age_metric(gcs_path):
'platform': environment.platform(),
'task': os.getenv('TASK_NAME'),
}
logs.info(f'JOB_BUILD_AGE: adding {elapsed_time_in_hours} for {labels}')
monitoring_metrics.JOB_BUILD_AGE.add(elapsed_time_in_hours, labels)
# This field is expected as a datetime object
# https://cloud.google.com/storage/docs/json_api/v1/objects#resource
Expand Down
15 changes: 10 additions & 5 deletions src/clusterfuzz/_internal/common/testcase_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,17 @@ def emit_testcase_triage_duration_metric(testcase_id: int, step: str):
' failed to emit TESTCASE_UPLOAD_TRIAGE_DURATION metric.')
return

labels = {
'job': testcase.job_type,
'step': step,
}

logs.info(
f'TESTCASE_UPLOAD_TRIAGE_DURATION: adding {elapsed_time_since_upload} for {labels}.'
)

monitoring_metrics.TESTCASE_UPLOAD_TRIAGE_DURATION.add(
elapsed_time_since_upload,
labels={
'job': testcase.job_type,
'step': step,
})
elapsed_time_since_upload, labels=labels)


def get_testcase_upload_metadata(
Expand Down
14 changes: 9 additions & 5 deletions src/clusterfuzz/_internal/cron/triage.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,16 @@ def _emit_untriaged_testcase_age_metric(critical_tasks_completed: bool,
if not testcase.timestamp:
return

labels = {
'job': testcase.job_type,
'platform': testcase.platform,
}

logs.info(f'UNTRIAGED_TESTCASE_AGE: adding {testcase.get_age_in_seconds()}'
' for {labels}')

monitoring_metrics.UNTRIAGED_TESTCASE_AGE.add(
testcase.get_age_in_seconds(),
labels={
'job': testcase.job_type,
'platform': testcase.platform,
})
testcase.get_age_in_seconds(), labels=labels)


def main():
Expand Down

0 comments on commit 58193a9

Please sign in to comment.