From e126c51a78516190822eeaacede56e093ca36129 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 30 Jan 2024 15:28:34 +0100 Subject: [PATCH] Implements feature based perf instrumentation comparision (#806) Introduces a new setup for measuring feature performance --- requirements.txt | 1 + .../varats/experiment/experiment_util.py | 11 +- varats-core/varats/experiment/steps/patch.py | 2 +- varats-core/varats/project/varats_command.py | 38 + varats/setup.py | 3 +- .../feature_perf_precision_database.py | 932 +++++++++++++ .../experiments/vara/feature_experiment.py | 3 + .../vara/feature_perf_precision.py | 1202 +++++++++++++++++ .../experiments/vara/feature_perf_runner.py | 6 +- varats/varats/jupyterhelper/file.py | 26 + varats/varats/plots/feature_perf_precision.py | 426 ++++++ varats/varats/projects/cpp_projects/hyteg.py | 5 +- .../perf_tests/feature_perf_cs_collection.py | 22 + .../varats/tables/feature_perf_precision.py | 538 ++++++++ varats/varats/tools/bb_config.py | 1 + 15 files changed, 3206 insertions(+), 10 deletions(-) create mode 100644 varats/varats/data/databases/feature_perf_precision_database.py create mode 100644 varats/varats/experiments/vara/feature_perf_precision.py create mode 100644 varats/varats/plots/feature_perf_precision.py create mode 100644 varats/varats/tables/feature_perf_precision.py diff --git a/requirements.txt b/requirements.txt index d1b506d98..7bbc7f53e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ benchbuild>=6.8 click>=8.1.3 +cliffs-delta>=1.0.0 distro>=1.5.0 graphviz>=0.14.2 ijson>=3.1.4 diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index 78e2aee8c..457c2e694 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -518,9 +518,12 @@ def call_with_output_folder(self, tmp_dir: Path) -> StepResult: """Actual call implementation that gets a path to tmp_folder.""" -class ZippedExperimentSteps( - MultiStep[tp.Union[OutputFolderStep, ProjectStep]] # type: ignore -): +ZippedStepTy = tp.TypeVar( + "ZippedStepTy", bound=tp.Union[OutputFolderStep, ProjectStep] +) + + +class ZippedExperimentSteps(MultiStep[ZippedStepTy]): # type: ignore """Runs multiple actions, providing them a shared tmp folder that afterwards is zipped into an archive.""" @@ -529,7 +532,7 @@ class ZippedExperimentSteps( def __init__( self, output_filepath: ReportFilepath, - actions: tp.Optional[tp.List[tp.Union[OutputFolderStep, ProjectStep]]] + actions: tp.Optional[tp.List[ZippedStepTy]] ) -> None: super().__init__(actions) self.__output_filepath = output_filepath diff --git a/varats-core/varats/experiment/steps/patch.py b/varats-core/varats/experiment/steps/patch.py index e03fd63a3..bbaf96aff 100644 --- a/varats-core/varats/experiment/steps/patch.py +++ b/varats-core/varats/experiment/steps/patch.py @@ -48,7 +48,7 @@ class RevertPatch(actions.ProjectStep): NAME = "REVERT_PATCH" DESCRIPTION = "Revert a Git patch from a project." - def __init__(self, project, patch): + def __init__(self, project: VProject, patch: Patch) -> None: super().__init__(project) self.__patch = patch diff --git a/varats-core/varats/project/varats_command.py b/varats-core/varats/project/varats_command.py index 3a78a5630..f0e938b5b 100644 --- a/varats-core/varats/project/varats_command.py +++ b/varats-core/varats/project/varats_command.py @@ -1,7 +1,12 @@ """Custom version of benchbuild's Command for use with the VaRA-Tool-Suite.""" import typing as tp +from pathlib import Path from benchbuild.command import Command, ProjectCommand, PathToken +from benchbuild.utils.cmd import time +from plumbum import local +from plumbum.commands.base import BaseCommand +from plumbum.machines import LocalCommand from varats.utils.config import get_config_patches @@ -75,6 +80,39 @@ def as_plumbum(self, **kwargs: tp.Any) -> 'BoundEnvCommand': return cmd + def as_plumbum_wrapped_with( + self, + wrapper_cmd: tp.Optional['BoundEnvCommand'] = None, + adapted_binary_location: tp.Optional[Path] = None, + **kwargs: tp.Any + ) -> 'BaseCommand': + base_cmd = super().as_plumbum(**kwargs) + + # TODO: maybe we should just provide a callable to modify the original + # command + if adapted_binary_location: + if isinstance(base_cmd, LocalCommand): + base_cmd.executable = base_cmd.executable.copy( + adapted_binary_location, override=True + ) + else: + base_cmd.cmd.executable = base_cmd.cmd.executable.copy( + adapted_binary_location, override=True + ) + + if wrapper_cmd: + cmd = wrapper_cmd[base_cmd] + else: + cmd = base_cmd + + if self._redirect_stdin: + cmd = cmd < str(self._redirect_stdin.render(**kwargs)) + + if self._redirect_stdout: + cmd = cmd > str(self._redirect_stdout.render(**kwargs)) + + return cmd + class VProjectCommand(ProjectCommand): # type: ignore diff --git a/varats/setup.py b/varats/setup.py index 1986b251d..1b4879185 100644 --- a/varats/setup.py +++ b/varats/setup.py @@ -30,7 +30,7 @@ "pandas>=1.5.3", "plotly>=5.13.1", "plumbum>=1.6", - "pygit2>=1.10", + "pygit2>=1.10,<1.14.0", "PyGithub>=1.47", "pygraphviz>=1.7", "pygtrie>=2.3", @@ -44,6 +44,7 @@ "tabulate>=0.9", "varats-core>=13.0.5", "wllvm>=1.3.1", + "cliffs-delta>=1.0.0", ], author="Florian Sattler", author_email="sattlerf@cs.uni-saarland.de", diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py new file mode 100644 index 000000000..8bdb1e1d6 --- /dev/null +++ b/varats/varats/data/databases/feature_perf_precision_database.py @@ -0,0 +1,932 @@ +"""Shared data aggregation function for analyzing feature performance.""" +import abc +import logging +import traceback +import typing as tp +from collections import defaultdict + +import numpy as np +import pandas as pd +from cliffs_delta import cliffs_delta # type: ignore +from scipy.stats import ttest_ind + +import varats.experiments.vara.feature_perf_precision as fpp +from varats.data.metrics import ConfusionMatrix +from varats.data.reports.performance_influence_trace_report import ( + PerfInfluenceTraceReport, + PerfInfluenceTraceReportAggregate, +) +from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.jupyterhelper.file import load_mpr_time_report_aggregate +from varats.paper.case_study import CaseStudy +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.multi_patch_report import MultiPatchReport +from varats.report.report import BaseReport, ReportFilepath +from varats.report.tef_report import ( + TEFReport, + TraceEvent, + TraceEventType, + TEFReportAggregate, +) +from varats.revision.revisions import get_processed_revisions_files +from varats.utils.git_util import FullCommitHash + +LOG = logging.getLogger(__name__) + + +def get_interactions_from_fr_string(interactions: str, sep: str = ",") -> str: + """Convert the feature strings in a TEFReport from FR(x,y) to x*y, similar + to the format used by SPLConqueror.""" + interactions = ( + interactions.replace("FR", "").replace("(", "").replace(")", "") + ) + interactions_list = interactions.split(sep) + + # Features cannot interact with itself, so remove duplicates + interactions_list = list(set(interactions_list)) + + # Ignore interactions with base, but do not remove base if it's the only + # feature + if "Base" in interactions_list and len(interactions_list) > 1: + interactions_list.remove("Base") + + interactions_str = "*".join(interactions_list) + + return interactions_str + + +def get_feature_performance_from_tef_report( + tef_report: TEFReport, +) -> tp.Dict[str, int]: + """Extract feature performance from a TEFReport.""" + open_events: tp.List[TraceEvent] = [] + + feature_performances: tp.Dict[str, int] = {} + + def get_matching_event( + open_events: tp.List[TraceEvent], closing_event: TraceEvent + ) -> tp.Optional[TraceEvent]: + for event in open_events: + if ( + event.uuid == closing_event.uuid and + event.pid == closing_event.pid and + event.tid == closing_event.tid + ): + open_events.remove(event) + return event + + LOG.debug( + f"Could not find matching start for Event {repr(closing_event)}." + ) + + return None + + found_missing_open_event = False + for trace_event in tef_report.trace_events: + if trace_event.category == "Feature": + if trace_event.event_type == TraceEventType.DURATION_EVENT_BEGIN: + # insert event at the top of the list + open_events.insert(0, trace_event) + elif trace_event.event_type == TraceEventType.DURATION_EVENT_END: + opening_event = get_matching_event(open_events, trace_event) + if not opening_event: + found_missing_open_event = True + continue + + end_timestamp = trace_event.timestamp + begin_timestamp = opening_event.timestamp + + # Subtract feature duration from parent duration such that + # it is not counted twice, similar to behavior in + # Performance-Influence models. + interactions = [event.name for event in open_events] + if open_events: + # Parent is equivalent to interaction of all open + # events. + interaction_string = get_interactions_from_fr_string( + ",".join(interactions) + ) + if interaction_string in feature_performances: + feature_performances[interaction_string] -= ( + end_timestamp - begin_timestamp + ) + else: + feature_performances[interaction_string] = -( + end_timestamp - begin_timestamp + ) + + interaction_string = get_interactions_from_fr_string( + ",".join(interactions + [trace_event.name]) + ) + + current_performance = feature_performances.get( + interaction_string, 0 + ) + feature_performances[interaction_string] = ( + current_performance + end_timestamp - begin_timestamp + ) + + if open_events: + LOG.error("Not all events have been correctly closed.") + LOG.debug(f"Events = {open_events}.") + + if found_missing_open_event: + LOG.error("Not all events have been correctly opened.") + + return feature_performances + + +class Profiler(): + """Profiler interface to add different profilers to the evaluation.""" + + def __init__( + self, name: str, experiment: tp.Type[FeatureExperiment], + overhead_experiment: tp.Type[FeatureExperiment], + report_type: tp.Type[BaseReport] + ) -> None: + self.__name = name + self.__experiment = experiment + self.__overhead_experiment = overhead_experiment + self.__report_type = report_type + + @property + def name(self) -> str: + """Name of the profiler used.""" + return self.__name + + @property + def experiment(self) -> tp.Type[FeatureExperiment]: + """Experiment used to produce this profilers information.""" + return self.__experiment + + @property + def overhead_experiment(self) -> tp.Type[FeatureExperiment]: + """Experiment used to produce overhead data that this profilers produced + when collecting information.""" + return self.__overhead_experiment + + @property + def report_type(self) -> tp.Type[BaseReport]: + """Report type used to load this profilers information.""" + return self.__report_type + + @property + def relative_cut_off(self) -> float: + """Returns the relative cut off in percent below which regressions + should not be considered.""" + return 0.01 + + @property + def absolute_cut_off(self) -> int: + """Returns the absolute cut off in milliseconds below which regressions + should not be considered.""" + return 100 + + def _is_significantly_different( + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Checks if there is a significant difference between old and new + values.""" + return self.__ttest(old_values, new_values) + + def __ttest( # pylint: disable=W0238 + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Implements t-test.""" + ttest_res = ttest_ind(old_values, new_values) + + if ttest_res.pvalue < 0.05: + return True + + return False + + def __cliffs_delta( # pylint: disable=W0238 + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Implements cliffs_delta test.""" + cdelta_val, _ = cliffs_delta(old_values, new_values) + + # if res == "large": + if abs(cdelta_val) > 0.7: + return True + + return False + + def _is_feature_relevant( + self, old_measurements: tp.List[int], new_measurements: tp.List[int] + ) -> bool: + """Check if a feature can be ignored for regression checking as it's + time measurements seem not relevant.""" + old_mean = np.mean(old_measurements) + new_mean = np.mean(new_measurements) + + if old_mean < self.absolute_cut_off and \ + new_mean < self.absolute_cut_off: + return False + + old_rel_cut_off = old_mean * self.relative_cut_off + abs_mean_diff = abs(old_mean - new_mean) + if abs_mean_diff < old_rel_cut_off: + return False + + return True + + def _precise_pim_regression_check( + self, baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] + ) -> bool: + """Compute if there was a regression in one of the feature terms of the + model between the current and the baseline, using a Mann-Whitney U + test.""" + is_regression = False + + for feature, old_values in baseline_pim.items(): + if feature in current_pim: + if feature == "Base": + # The regression should be identified in actual feature code + continue + + new_values = current_pim[feature] + + # Skip features that seem not to be relevant + # for regressions testing + if not self._is_feature_relevant(old_values, new_values): + continue + + is_regression = is_regression or \ + self._is_significantly_different( + old_values, new_values + ) + else: + if np.mean(old_values) > self.absolute_cut_off: + print( + f"Could not find feature {feature} in new trace. " + f"({np.mean(old_values)}us lost)" + ) + + return is_regression + + def _sum_pim_regression_check( + self, baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] + ) -> bool: + """ + Compute if there was a regression in the sum of the features in the + model between the current and the baseline. + + The comparision is done through a Mann-Whitney U test. + """ + baseline_pim_totals: tp.List[tp.List[int]] = [ + old_values for feature, old_values in baseline_pim.items() + if feature != "Base" + ] + current_pim_totals: tp.List[tp.List[int]] = [ + current_values for feature, current_values in current_pim.items() + if feature != "Base" + ] + + baseline_pim_total: tp.List[int] = [ + sum(values) for values in zip(*baseline_pim_totals) + ] + current_pim_total: tp.List[int] = [ + sum(values) for values in zip(*current_pim_totals) + ] + + if not baseline_pim_total and not current_pim_total: + # How did we get here? + return False + + mean_baseline = np.mean(baseline_pim_total) + mean_diff = abs(mean_baseline - np.mean(current_pim_total)) + if mean_diff < self.absolute_cut_off or \ + mean_diff < mean_baseline * self.relative_cut_off: + return False + + return self._is_significantly_different( + baseline_pim_total, current_pim_total + ) + + def pim_regression_check( + self, baseline_pim: tp.DefaultDict[str, tp.List[int]], + current_pim: tp.DefaultDict[str, tp.List[int]] + ) -> bool: + """Compares two pims and determines if there was a regression between + the baseline and current.""" + return self._precise_pim_regression_check(baseline_pim, current_pim) + + def default_regression_check( + self, old_values: tp.Sequence[tp.Union[float, int]], + new_values: tp.Sequence[tp.Union[float, int]] + ) -> bool: + """Checks if there is a significant difference between old and new + values.""" + return self._is_significantly_different(old_values, new_values) + + @abc.abstractmethod + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + + +class VXray(Profiler): + """Profiler mapper implementation for the vara tef tracer.""" + + def __init__(self) -> None: + super().__init__( + "WXray", fpp.TEFProfileRunner, fpp.TEFProfileOverheadRunner, + fpp.MPRTEFAggregate + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + multi_report = MultiPatchReport( + report_path.full_path(), TEFReportAggregate + ) + + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_tef_report in multi_report.get_baseline_report().reports(): + pim = get_feature_performance_from_tef_report(old_tef_report) + for feature, value in pim.items(): + old_acc_pim[feature].append(value) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_tef_report in opt_mr.reports(): + pim = get_feature_performance_from_tef_report(new_tef_report) + for feature, value in pim.items(): + new_acc_pim[feature].append(value) + + return self.pim_regression_check(old_acc_pim, new_acc_pim) + + +class PIMTracer(Profiler): + """Profiler mapper implementation for the vara performance-influence-model + tracer.""" + + def __init__(self) -> None: + super().__init__( + "PIMTracer", fpp.PIMProfileRunner, fpp.PIMProfileOverheadRunner, + fpp.MPRPIMAggregate + ) + + @staticmethod + def __aggregate_pim_data( + reports: tp.List[PerfInfluenceTraceReport] + ) -> tp.DefaultDict[str, tp.List[int]]: + acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_pim_report in reports: + per_report_acc_pim: tp.DefaultDict[str, int] = defaultdict(int) + for region_inter in old_pim_report.region_interaction_entries: + name = get_interactions_from_fr_string( + old_pim_report._translate_interaction( + region_inter.interaction + ), + sep="*" + ) + per_report_acc_pim[name] += region_inter.time + + for name, time_value in per_report_acc_pim.items(): + acc_pim[name].append(time_value) + + return acc_pim + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + multi_report = MultiPatchReport( + report_path.full_path(), PerfInfluenceTraceReportAggregate + ) + + old_acc_pim = self.__aggregate_pim_data( + multi_report.get_baseline_report().reports() + ) + + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + new_acc_pim = self.__aggregate_pim_data(opt_mr.reports()) + + return self.pim_regression_check(old_acc_pim, new_acc_pim) + + +class EbpfTraceTEF(Profiler): + """Profiler mapper implementation for the vara tef tracer.""" + + def __init__(self) -> None: + super().__init__( + "eBPFTrace", fpp.EbpfTraceTEFProfileRunner, + fpp.TEFProfileOverheadRunner, fpp.MPRTEFAggregate + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + """Checks if there was a regression between the old an new data.""" + multi_report = MultiPatchReport( + report_path.full_path(), TEFReportAggregate + ) + + old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + for old_tef_report in multi_report.get_baseline_report().reports(): + pim = get_feature_performance_from_tef_report(old_tef_report) + for feature, value in pim.items(): + old_acc_pim[feature].append(value) + + new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list) + opt_mr = multi_report.get_report_for_patch(patch_name) + if not opt_mr: + raise NotImplementedError() + + for new_tef_report in opt_mr.reports(): + pim = get_feature_performance_from_tef_report(new_tef_report) + for feature, value in pim.items(): + new_acc_pim[feature].append(value) + + return self.pim_regression_check(old_acc_pim, new_acc_pim) + + +class Baseline(Profiler): + """Profiler mapper implementation for the black-box baseline.""" + + def __init__(self) -> None: + super().__init__( + "Base", fpp.BlackBoxBaselineRunner, fpp.BlackBoxOverheadBaseline, + TimeReportAggregate + ) + + def is_regression( + self, report_path: ReportFilepath, patch_name: str + ) -> bool: + time_reports = load_mpr_time_report_aggregate(report_path) + + old_time = time_reports.get_baseline_report() + new_time = time_reports.get_report_for_patch(patch_name) + if not new_time: + raise LookupError(f"Missing new time report in file {report_path}") + + # Cut off regressions smaller than 100ms + req_diff = self.absolute_cut_off / 1000 + if np.mean(old_time.measurements_wall_clock_time + ) == np.mean(new_time.measurements_wall_clock_time): + return False + + if abs( + np.mean(old_time.measurements_wall_clock_time) - + np.mean(new_time.measurements_wall_clock_time) + ) < req_diff: + return False + + return self.default_regression_check( + old_time.measurements_wall_clock_time, + new_time.measurements_wall_clock_time + ) + + +def get_patch_names(case_study: CaseStudy) -> tp.List[str]: + """Looks up all patch names from the given case study.""" + report_files = get_processed_revisions_files( + case_study.project_name, + fpp.BlackBoxBaselineRunner, + fpp.MPRTimeReportAggregate, + get_case_study_file_name_filter(case_study), + config_id=0 + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find profiling data for {case_study.project_name}" + ". config_id=0, profiler=Baseline" + ) + return [] + + time_reports = load_mpr_time_report_aggregate(report_files[0].full_path()) + + return time_reports.get_patch_names() + + +def get_regressing_config_ids_gt( + project_name: str, case_study: CaseStudy, rev: FullCommitHash, + patch_name: str +) -> tp.Optional[tp.Dict[int, bool]]: + """Computes the baseline data, i.e., the config ids where a regression was + identified.""" + + ground_truth: tp.Dict[int, bool] = {} + + for config_id in case_study.get_config_ids_for_revision(rev): + report_files = get_processed_revisions_files( + project_name, + fpp.BlackBoxBaselineRunner, + fpp.MPRTimeReportAggregate, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find profiling data for {case_study.project_name}." + f" {config_id=}, profiler=Baseline" + ) + return None + + baseline_prof = Baseline() + ground_truth[config_id] = baseline_prof.is_regression( + report_files[0], patch_name + ) + + return ground_truth + + +def map_to_positive_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: + return [config_id for config_id, value in reg_dict.items() if value is True] + + +def map_to_negative_config_ids(reg_dict: tp.Dict[int, bool]) -> tp.List[int]: + return [ + config_id for config_id, value in reg_dict.items() if value is False + ] + + +def compute_profiler_predictions( + profiler: Profiler, project_name: str, case_study: CaseStudy, + config_ids: tp.List[int], patch_name: str +) -> tp.Optional[tp.Dict[int, bool]]: + """Computes the regression predictions for a given profiler.""" + + result_dict: tp.Dict[int, bool] = {} + for config_id in config_ids: + print( + f"Compute profiler predictions: profiler={profiler.name} - " + f"{project_name=} - {patch_name} - {config_id=}" + ) + report_files = get_processed_revisions_files( + project_name, + profiler.experiment, + profiler.report_type, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find profiling data for {project_name=}" + f". {config_id=}, profiler={profiler.name}" + ) + return None + + try: + result_dict[config_id] = profiler.is_regression( + report_files[0], patch_name + ) + except Exception as exception: # pylint: disable=W0718 + # Print exception information but continue working on the plot/table + print( + f"FAILURE: Skipping {config_id=} of {project_name=}, " + f"profiler={profiler.name}" + ) + print(exception) + print(traceback.format_exc()) + + return result_dict + + +class OverheadData: + """Data class to store the collected overhead data and provide high-level + operations on it.""" + + def __init__( + self, mean_time: tp.Dict[int, float], mean_memory: tp.Dict[int, float], + major_page_faults: tp.Dict[int, + float], minor_page_faults: tp.Dict[int, + float], + fs_inputs: tp.Dict[int, float], fs_outputs: tp.Dict[int, float] + ) -> None: + self._mean_time: tp.Dict[int, float] = mean_time + self._mean_memory: tp.Dict[int, float] = mean_memory + self._mean_major_page_faults: tp.Dict[int, float] = major_page_faults + self._mean_minor_page_faults: tp.Dict[int, float] = minor_page_faults + self._mean_fs_inputs: tp.Dict[int, float] = fs_inputs + self._mean_fs_outputs: tp.Dict[int, float] = fs_outputs + + def mean_time(self) -> float: + return float(np.mean(list(self._mean_time.values()))) + + def mean_memory(self) -> float: + return float(np.mean(list(self._mean_memory.values()))) + + def mean_major_page_faults(self) -> float: + return float(np.mean(list(self._mean_major_page_faults.values()))) + + def mean_minor_page_faults(self) -> float: + return float(np.mean(list(self._mean_minor_page_faults.values()))) + + def mean_fs_inputs(self) -> float: + return float(np.mean(list(self._mean_fs_inputs.values()))) + + def mean_fs_outputs(self) -> float: + return float(np.mean(list(self._mean_fs_outputs.values()))) + + # TODO: remove after 'Type' notation is removed + # pylint: disable=protected-access + def config_wise_time_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise(self._mean_time, other._mean_time) + + def config_wise_memory_diff(self, + other: 'OverheadData') -> tp.Dict[int, float]: + return self.__config_wise(self._mean_memory, other._mean_memory) + + def config_wise_major_page_faults_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise( + self._mean_major_page_faults, other._mean_major_page_faults + ) + + def config_wise_minor_page_faults_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise( + self._mean_minor_page_faults, other._mean_minor_page_faults + ) + + def config_wise_fs_inputs_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise(self._mean_fs_inputs, other._mean_fs_inputs) + + def config_wise_fs_outputs_diff( + self, other: 'OverheadData' + ) -> tp.Dict[int, float]: + return self.__config_wise(self._mean_fs_outputs, other._mean_fs_outputs) + + # pylint: enable=protected-access + + @staticmethod + def __config_wise( + self_map: tp.Dict[int, float], other_map: tp.Dict[int, float] + ) -> tp.Dict[int, float]: + gen_diff: tp.Dict[int, float] = {} + for config_id, gen_value in self_map.items(): + if config_id not in other_map: + raise AssertionError("Could not find config id in other") + + gen_diff[config_id] = gen_value - other_map[config_id] + + return gen_diff + + @staticmethod + def compute_overhead_data( + profiler: Profiler, case_study: CaseStudy, rev: FullCommitHash + ) -> tp.Optional['OverheadData']: + """Computes overhead data for a given case study.""" + + mean_time: tp.Dict[int, float] = {} + mean_memory: tp.Dict[int, float] = {} + mean_major_page_faults: tp.Dict[int, float] = {} + mean_minor_page_faults: tp.Dict[int, float] = {} + mean_fs_inputs: tp.Dict[int, float] = {} + mean_fs_outputs: tp.Dict[int, float] = {} + + for config_id in case_study.get_config_ids_for_revision(rev): + report_files = get_processed_revisions_files( + case_study.project_name, + profiler.overhead_experiment, + TimeReportAggregate, + get_case_study_file_name_filter(case_study), + config_id=config_id + ) + + if len(report_files) > 1: + raise AssertionError("Should only be one") + if not report_files: + print( + f"Could not find overhead data. {config_id=}, " + f"profiler={profiler.name}" + ) + return None + + time_report = TimeReportAggregate(report_files[0].full_path()) + mean_time[config_id] = float( + np.mean(time_report.measurements_wall_clock_time) + ) + mean_memory[config_id] = float( + np.mean(time_report.max_resident_sizes) + ) + mean_major_page_faults[config_id] = float( + np.mean(time_report.major_page_faults) + ) + mean_minor_page_faults[config_id] = float( + np.mean(time_report.minor_page_faults) + ) + mean_fs_inputs[config_id] = float( + np.mean([io[0] for io in time_report.filesystem_io]) + ) + mean_fs_outputs[config_id] = float( + np.mean([io[1] for io in time_report.filesystem_io]) + ) + if not mean_time: + print( + f"Case study for project {case_study.project_name} had " + "no configs, skipping..." + ) + return None + + return OverheadData( + mean_time, mean_memory, mean_major_page_faults, + mean_minor_page_faults, mean_fs_inputs, mean_fs_outputs + ) + + +def load_precision_data( + case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] +) -> pd.DataFrame: + """Loads precision measurement data for the given cases studies and computes + precision and recall for the different profilers.""" + table_rows_plot = [] + for case_study in case_studies: + for patch_name in get_patch_names(case_study): + rev = case_study.revisions[0] + project_name = case_study.project_name + + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) + + for profiler in profilers: + new_row = { + 'CaseStudy': + project_name, + 'Patch': + patch_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) + if ground_truth else -1 + } + + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev), patch_name + ) + + if ground_truth and predicted: + results = ConfusionMatrix( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + + new_row['precision'] = results.precision() + new_row['recall'] = results.recall() + new_row['f1_score'] = results.f1_score() + new_row['Profiler'] = profiler.name + new_row['fp_ids'] = results.getFPs() + new_row['fn_ids'] = results.getFNs() + else: + new_row['precision'] = np.nan + new_row['recall'] = np.nan + new_row['f1_score'] = np.nan + new_row['Profiler'] = profiler.name + new_row['fp_ids'] = [] + new_row['fn_ids'] = [] + + table_rows_plot.append(new_row) + + return pd.DataFrame(table_rows_plot) + + +def load_overhead_data( + case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] +) -> pd.DataFrame: + """Loads overhead measurement data for the given cases studies and computes + overhead metrics that where introduced by the different profilers.""" + table_rows = [] + + for case_study in case_studies: + rev = case_study.revisions[0] + project_name = case_study.project_name + + overhead_ground_truth = OverheadData.compute_overhead_data( + Baseline(), case_study, rev + ) + if not overhead_ground_truth: + print(f"No baseline data for {case_study.project_name}, skipping") + continue + + new_row = { + 'CaseStudy': project_name, + 'Profiler': "Base", + 'time': overhead_ground_truth.mean_time(), + 'memory': overhead_ground_truth.mean_memory(), + 'major_page_faults': overhead_ground_truth.mean_major_page_faults(), + 'minor_page_faults': overhead_ground_truth.mean_minor_page_faults(), + 'fs_inputs': overhead_ground_truth.mean_fs_inputs(), + 'fs_outputs': overhead_ground_truth.mean_fs_outputs(), + 'overhead_time': 0, + 'overhead_memory': 0, + 'overhead_major_page_faults': 0, + 'overhead_minor_page_faults': 0, + 'overhead_fs_inputs': 0, + 'overhead_fs_outputs': 0 + } + + table_rows.append(new_row) + + for profiler in profilers: + profiler_overhead = OverheadData.compute_overhead_data( + profiler, case_study, rev + ) + + new_row = {'CaseStudy': project_name, 'Profiler': profiler.name} + + if profiler_overhead: + time_diff = profiler_overhead.config_wise_time_diff( + overhead_ground_truth + ) + memory_diff = profiler_overhead.config_wise_memory_diff( + overhead_ground_truth + ) + major_page_faults_diff = \ + profiler_overhead.config_wise_major_page_faults_diff( + overhead_ground_truth + ) + minor_page_faults_diff = \ + profiler_overhead.config_wise_minor_page_faults_diff( + overhead_ground_truth + ) + fs_inputs_diff = profiler_overhead.config_wise_fs_inputs_diff( + overhead_ground_truth + ) + fs_outputs_diff = profiler_overhead.config_wise_fs_outputs_diff( + overhead_ground_truth + ) + + new_row['time'] = profiler_overhead.mean_time() + new_row['overhead_time'] = np.mean(list(time_diff.values())) + + new_row['memory'] = profiler_overhead.mean_memory() + new_row['overhead_memory'] = np.mean(list(memory_diff.values())) + + new_row['major_page_faults' + ] = profiler_overhead.mean_major_page_faults() + new_row['overhead_major_page_faults'] = np.mean( + list(major_page_faults_diff.values()) + ) + + new_row['minor_page_faults' + ] = profiler_overhead.mean_minor_page_faults() + new_row['overhead_minor_page_faults'] = np.mean( + list(minor_page_faults_diff.values()) + ) + + new_row['fs_inputs'] = profiler_overhead.mean_fs_inputs() + new_row['overhead_fs_inputs'] = np.mean( + list(fs_inputs_diff.values()) + ) + + new_row['fs_outputs'] = profiler_overhead.mean_fs_outputs() + new_row['overhead_fs_outputs'] = np.mean( + list(fs_outputs_diff.values()) + ) + else: + new_row['time'] = np.nan + new_row['overhead_time'] = np.nan + + new_row['memory'] = np.nan + new_row['overhead_memory'] = np.nan + + new_row['major_page_faults'] = np.nan + new_row['overhead_major_page_faults'] = np.nan + + new_row['minor_page_faults'] = np.nan + new_row['overhead_minor_page_faults'] = np.nan + + new_row['fs_inputs'] = np.nan + new_row['overhead_fs_inputs'] = np.nan + + new_row['fs_outputs'] = np.nan + new_row['overhead_fs_outputs'] = np.nan + + table_rows.append(new_row) + + return pd.DataFrame(table_rows) diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index a4cc8722d..3eacff35e 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -342,6 +342,9 @@ def run_traced_code(self) -> StepResult: flush=True ) with cleanup(prj_command): + pb_cmd = pb_cmd[get_extra_config_options( + self.project + )] _, _, err = pb_cmd.run() xray = re.findall( r"XRay: Log file in '(.+?)'", diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py new file mode 100644 index 000000000..2c58fcee4 --- /dev/null +++ b/varats/varats/experiments/vara/feature_perf_precision.py @@ -0,0 +1,1202 @@ +"""Module for feature performance precision experiments that evaluate +measurement support of vara.""" +import tempfile +import textwrap +import typing as tp +from abc import abstractmethod +from pathlib import Path +from time import sleep + +import benchbuild.extensions as bb_ext +from benchbuild.command import cleanup, ProjectCommand +from benchbuild.environments.domain.declarative import ContainerImage +from benchbuild.utils import actions +from benchbuild.utils.actions import StepResult +from benchbuild.utils.cmd import time, cp, sudo, bpftrace +from plumbum import local, BG +from plumbum.commands.modifiers import Future + +from varats.data.reports.performance_influence_trace_report import ( + PerfInfluenceTraceReportAggregate, +) +from varats.experiment.experiment_util import ( + WithUnlimitedStackSize, + ZippedReportFolder, + create_new_success_result_filepath, + get_default_compile_error_wrapped, + ZippedExperimentSteps, + OutputFolderStep, + get_config_patch_steps, +) +from varats.experiment.steps.patch import ApplyPatch, RevertPatch +from varats.experiment.steps.recompile import ReCompile +from varats.experiment.workload_util import WorkloadCategory, workload_commands +from varats.experiments.vara.feature_experiment import ( + FeatureExperiment, + FeatureInstrType, +) +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import BinaryType, ProjectBinaryWrapper +from varats.project.varats_project import VProject +from varats.provider.patch.patch_provider import PatchProvider +from varats.report.gnu_time_report import TimeReportAggregate +from varats.report.multi_patch_report import MultiPatchReport +from varats.report.report import ReportSpecification +from varats.report.tef_report import TEFReportAggregate +from varats.tools.research_tools.vara import VaRA +from varats.utils.config import get_current_config_id +from varats.utils.git_util import ShortCommitHash + +REPS = 3 + +IDENTIFIER_PATCH_TAG = 'perf_prec' + + +def perf_prec_workload_commands( + project: VProject, binary: ProjectBinaryWrapper +) -> tp.List[ProjectCommand]: + """Uniformly select the workloads that should be processed.""" + + wl_commands = [] + + if not project.name.startswith( + "SynthIP" + ) and project.name != "SynthSAFieldSensitivity": + # Example commands from these CS are to "fast" + wl_commands += workload_commands( + project, binary, [WorkloadCategory.EXAMPLE] + ) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL]) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + + return wl_commands + + +def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]: + """Uniformly select the binaries that should be analyzed.""" + if project.name == "DunePerfRegression": + return [ + binary for binary in project.binaries + if binary.name == "poisson_yasp_q2_3d" + ] + + return [project.binaries[0]] + + +def get_extra_cflags(project: VProject) -> tp.List[str]: + """Get additional cflags for some projects.""" + if project.name in ["DunePerfRegression", "HyTeg"]: + # Disable phasar for dune as the analysis cannot handle dunes size + return ["-fvara-disable-phasar"] + + return [] + + +def get_threshold(project: VProject) -> int: + """Get the project specific instrumentation threshold.""" + if project.DOMAIN is ProjectDomains.TEST: + if project.name in [ + "SynthSAFieldSensitivity", "SynthIPRuntime", "SynthIPTemplate", + "SynthIPTemplate2", "SynthIPCombined" + ]: + # Don't instrument everything for these synthtic projects + return 10 + + return 0 + + if project.name in ["HyTeg"]: + return 0 + + return 100 + + +class AnalysisProjectStepBase(OutputFolderStep): + """Base class for project steps.""" + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "json", + reps: int = REPS + ) -> None: + super().__init__(project=project) + self._binary = binary + self._report_file_ending = report_file_ending + self._file_name = file_name + self._reps = reps + + @abstractmethod + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + """Actual call implementation that gets a path to tmp_folder.""" + + +class MPRTimeReportAggregate( + MultiPatchReport[TimeReportAggregate], shorthand="MPRTRA", file_type=".zip" +): + """Multi-patch wrapper report for time aggregates.""" + + def __init__(self, path: Path) -> None: + super().__init__(path, TimeReportAggregate) + + +class MPRTEFAggregate( + MultiPatchReport[TEFReportAggregate], shorthand="MPRTEFA", file_type=".zip" +): + """Multi-patch wrapper report for tef aggregates.""" + + def __init__(self, path: Path) -> None: + super().__init__(path, TEFReportAggregate) + + +class MPRPIMAggregate( + MultiPatchReport[TEFReportAggregate], shorthand="MPRPIMA", file_type=".zip" +): + """Multi-patch wrapper report for tef aggregates.""" + + def __init__(self, path: Path) -> None: + # TODO: clean up report handling, we currently parse it as a TEFReport + # as the file looks similar + super().__init__( + path, + PerfInfluenceTraceReportAggregate # type: ignore + ) + + +class RunGenTracedWorkloads(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / self._file_name + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self._reps): + for prj_command in perf_prec_workload_commands( + self.project, self._binary + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + + with cleanup(prj_command): + pb_cmd(retcode=self._binary.valid_exit_codes) + + return StepResult.OK + + +class RunBPFTracedWorkloads(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunBPFTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / self._file_name + with tempfile.TemporaryDirectory() as non_nfs_tmp_dir: + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self._reps): + for prj_command in perf_prec_workload_commands( + self.project, self._binary + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env( + VARA_TRACE_FILE=local_tracefile_path + ): + adapted_binary_location = Path( + non_nfs_tmp_dir + ) / self._binary.name + + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + adapted_binary_location= + adapted_binary_location, + project=self.project + ) + + bpf_runner = \ + self.attach_usdt_raw_tracing( + local_tracefile_path, + adapted_binary_location, + Path(non_nfs_tmp_dir) + ) + + with cleanup(prj_command): + print( + "Running example " + f"{prj_command.command.label}" + ) + pb_cmd( + retcode=self._binary.valid_exit_codes + ) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + @staticmethod + def attach_usdt_raw_tracing( + report_file: Path, binary: Path, non_nfs_tmp_dir: Path + ) -> Future: + """Attach bpftrace script to binary to activate raw USDT probes.""" + orig_bpftrace_script_location = Path( + VaRA.install_location(), + "share/vara/perf_bpf_tracing/RawUsdtTefMarker.bt" + ) + # Store bpftrace script in a local tmp dir that is not on nfs + bpftrace_script_location = non_nfs_tmp_dir / "RawUsdtTefMarker.bt" + cp(orig_bpftrace_script_location, bpftrace_script_location) + + bpftrace_script = bpftrace["-o", report_file, "--no-warnings", "-q", + bpftrace_script_location, binary] + bpftrace_script = bpftrace_script.with_env(BPFTRACE_PERF_RB_PAGES=8192) + + # Assertion: Can be run without sudo password prompt. + bpftrace_cmd = sudo[bpftrace_script] + + bpftrace_runner = bpftrace_cmd & BG + # give bpftrace time to start up, requires more time than regular USDT + # script because a large number of probes increases the startup time + sleep(10) + return bpftrace_runner + + +class RunBCCTracedWorkloads(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunBCCTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / self._file_name + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self._reps): + for prj_command in perf_prec_workload_commands( + self.project, self._binary + ): + local_tracefile_path = Path(reps_tmp_dir) / ( + f"trace_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=local_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print( + f"Running example {prj_command.command.label}" + ) + + bpf_runner = bpf_runner = self.attach_usdt_bcc( + local_tracefile_path, + self.project.source_of_primary / + self._binary.path + ) + + with cleanup(prj_command): + pb_cmd(retcode=self._binary.valid_exit_codes) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + @staticmethod + def attach_usdt_bcc(report_file: Path, binary: Path) -> Future: + """Attach bcc script to binary to activate USDT probes.""" + bcc_script_location = Path( + VaRA.install_location(), + "share/vara/perf_bpf_tracing/UsdtTefMarker.py" + ) + bcc_script = local[str(bcc_script_location)] + + # Assertion: Can be run without sudo password prompt. + bcc_cmd = bcc_script["--output_file", report_file, "--no_poll", + "--executable", binary] + print(f"{bcc_cmd=}") + bcc_cmd = sudo[bcc_cmd] + + bcc_runner = bcc_cmd & BG + sleep(3) # give bcc script time to start up + return bcc_runner + + +AnalysisProjectStepBaseTy = tp.TypeVar( + "AnalysisProjectStepBaseTy", bound=AnalysisProjectStepBase +) + + +def setup_actions_for_vara_experiment( + experiment: FeatureExperiment, project: VProject, + instr_type: FeatureInstrType, + analysis_step: tp.Type[AnalysisProjectStepBaseTy] +) -> tp.MutableSequence[actions.Step]: + """Sets up actions for a given perf precision experiment.""" + + project.cflags += experiment.get_vara_feature_cflags(project) + + threshold = get_threshold(project) + project.cflags += experiment.get_vara_tracing_cflags( + instr_type, + project=project, + save_temps=True, + instruction_threshold=threshold + ) + + project.cflags += get_extra_cflags(project) + + project.ldflags += experiment.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, experiment + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, experiment + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + experiment.get_handle(), project, experiment.REPORT_SPEC.main_report + ) + + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + experiment.get_handle(), + experiment.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + patch_provider = PatchProvider.get_provider_for_project(type(project)) + patches = patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + )[IDENTIFIER_PATCH_TAG] + + patch_steps = [] + for patch in patches: + patch_steps.append(ApplyPatch(project, patch)) + patch_steps.append(ReCompile(project)) + patch_steps.append( + analysis_step( + project, + binary, + file_name=MultiPatchReport.create_patched_report_name( + patch, "rep_measurements" + ) + ) + ) + patch_steps.append(RevertPatch(project, patch)) + + analysis_actions = get_config_patch_steps(project) + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, [ + analysis_step( + project, + binary, + file_name=MultiPatchReport. + create_baseline_report_name("rep_measurements") + ) + ] + patch_steps + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + +class TEFProfileRunner(FeatureExperiment, shorthand="TEFp"): + """Test runner for feature performance.""" + + NAME = "RunTEFProfiler" + + REPORT_SPEC = ReportSpecification(MPRTEFAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, + project, + FeatureInstrType.TEF, + RunGenTracedWorkloads # type: ignore[type-abstract] + ) + + +class PIMProfileRunner(FeatureExperiment, shorthand="PIMp"): + """Test runner for feature performance.""" + + NAME = "RunPIMProfiler" + + REPORT_SPEC = ReportSpecification(MPRPIMAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, + project, + FeatureInstrType.PERF_INFLUENCE_TRACE, + RunGenTracedWorkloads # type: ignore[type-abstract] + ) + + +class EbpfTraceTEFProfileRunner(FeatureExperiment, shorthand="ETEFp"): + """Test runner for feature performance.""" + + NAME = "RunEBPFTraceTEFProfiler" + + REPORT_SPEC = ReportSpecification(MPRTEFAggregate) + + CONTAINER = ContainerImage().run('apt', 'install', '-y', 'bpftrace') + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, + project, + FeatureInstrType.USDT_RAW, + RunBPFTracedWorkloads # type: ignore[type-abstract] + ) + + +class BCCTEFProfileRunner(FeatureExperiment, shorthand="BCCp"): + """Test runner for feature performance.""" + + NAME = "RunBCCTEFProfiler" + + REPORT_SPEC = ReportSpecification(MPRTEFAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_experiment( + self, + project, + FeatureInstrType.USDT, + RunBCCTracedWorkloads # type: ignore[type-abstract] + ) + + +class RunBlackBoxBaseline(OutputFolderStep): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "txt", + reps: int = REPS + ) -> None: + super().__init__(project=project) + self.__binary = binary + self.__report_file_ending = report_file_ending + self.__reps = reps + self.__file_name = file_name + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + zip_tmp_dir = tmp_dir / self.__file_name + with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir: + for rep in range(0, self.__reps): + for prj_command in perf_prec_workload_commands( + self.project, self.__binary + ): + time_report_file = Path(reps_tmp_dir) / ( + f"baseline_{prj_command.command.label}_{rep}" + f".{self.__report_file_ending}" + ) + + print(f"Running example {prj_command.command.label}") + + with cleanup(prj_command): + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) + pb_cmd(retcode=self.__binary.valid_exit_codes) + + return StepResult.OK + + +class BlackBoxBaselineRunner(FeatureExperiment, shorthand="BBBase"): + """Test runner for feature performance.""" + + NAME = "GenBBBaseline" + + REPORT_SPEC = ReportSpecification(MPRTimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"] + + project.cflags += get_extra_cflags(project) + + project.ldflags += self.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, self + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, self + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + patch_provider = PatchProvider.get_provider_for_project(project) + patches = patch_provider.get_patches_for_revision( + ShortCommitHash(project.version_of_primary) + )[IDENTIFIER_PATCH_TAG] + print(f"{patches=}") + + patch_steps = [] + for patch in patches: + print(f"Got patch with path: {patch.path}") + patch_steps.append(ApplyPatch(project, patch)) + patch_steps.append(ReCompile(project)) + patch_steps.append( + RunBlackBoxBaseline( + project, + binary, + file_name=MPRTimeReportAggregate.create_patched_report_name( + patch, "rep_measurements" + ) + ) + ) + patch_steps.append(RevertPatch(project, patch)) + + analysis_actions = get_config_patch_steps(project) + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, [ + RunBlackBoxBaseline( + project, + binary, + file_name=MPRTimeReportAggregate. + create_baseline_report_name("rep_measurements") + ) + ] + patch_steps + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + +################################################################################ +# Overhead computation +################################################################################ + + +class RunGenTracedWorkloadsOverhead(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "txt", + reps: int = REPS + ) -> None: + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self._reps): + for prj_command in perf_prec_workload_commands( + self.project, self._binary + ): + base = Path("/tmp/") + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) + + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + print(f"Running example {prj_command.command.label}") + + with cleanup(prj_command): + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) + pb_cmd(retcode=self._binary.valid_exit_codes) + + return StepResult.OK + + +class RunBPFTracedWorkloadsOverhead(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "txt", + reps: int = REPS + ) -> None: + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + with tempfile.TemporaryDirectory() as non_nfs_tmp_dir: + for rep in range(0, self._reps): + for prj_command in perf_prec_workload_commands( + self.project, self._binary + ): + base = Path(non_nfs_tmp_dir) + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) + + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + adapted_binary_location = Path( + non_nfs_tmp_dir + ) / self._binary.name + + pb_cmd = \ + prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + adapted_binary_location, + project=self.project + ) + + bpf_runner = \ + RunBPFTracedWorkloads.attach_usdt_raw_tracing( + fake_tracefile_path, \ + adapted_binary_location, + Path(non_nfs_tmp_dir) + ) + + with cleanup(prj_command): + print( + "Running example " + f"{prj_command.command.label}" + ) + pb_cmd(retcode=self._binary.valid_exit_codes) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + +class RunBCCTracedWorkloadsOverhead(AnalysisProjectStepBase): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + file_name: str, + report_file_ending: str = "txt", + reps: int = REPS + ) -> None: + super().__init__(project, binary, file_name, report_file_ending, reps) + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Run instrumented code", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self._reps): + for prj_command in perf_prec_workload_commands( + self.project, self._binary + ): + base = Path("/tmp/") + fake_tracefile_path = base / ( + f"trace_{prj_command.command.label}_{rep}" + f".json" + ) + + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self._report_file_ending}" + ) + + with local.env(VARA_TRACE_FILE=fake_tracefile_path): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + print(f"Running example {prj_command.command.label}") + + timed_pb_cmd = time["-v", "-o", time_report_file, "--", + pb_cmd] + + bpf_runner = RunBCCTracedWorkloads.attach_usdt_bcc( + fake_tracefile_path, + self.project.source_of_primary / self._binary.path + ) + + with cleanup(prj_command): + timed_pb_cmd(retcode=self._binary.valid_exit_codes) + + # wait for bpf script to exit + if bpf_runner: + bpf_runner.wait() + + return StepResult.OK + + +def setup_actions_for_vara_overhead_experiment( + experiment: FeatureExperiment, project: VProject, + instr_type: FeatureInstrType, + analysis_step: tp.Type[AnalysisProjectStepBase] +) -> tp.MutableSequence[actions.Step]: + """Sets up actions for a given perf overhead experiment.""" + project.cflags += experiment.get_vara_feature_cflags(project) + + threshold = get_threshold(project) + project.cflags += experiment.get_vara_tracing_cflags( + instr_type, project=project, instruction_threshold=threshold + ) + + project.cflags += get_extra_cflags(project) + + project.ldflags += experiment.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, experiment + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, experiment + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + experiment.get_handle(), project, experiment.REPORT_SPEC.main_report + ) + + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + experiment.get_handle(), + experiment.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = get_config_patch_steps(project) + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, + [ + analysis_step( # type: ignore + project, binary, "overhead" + ) + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + +class TEFProfileOverheadRunner(FeatureExperiment, shorthand="TEFo"): + """Test runner for feature performance.""" + + NAME = "RunTEFProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.TEF, RunGenTracedWorkloadsOverhead + ) + + +class PIMProfileOverheadRunner(FeatureExperiment, shorthand="PIMo"): + """Test runner for feature performance.""" + + NAME = "RunPIMProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.PERF_INFLUENCE_TRACE, + RunGenTracedWorkloadsOverhead + ) + + +class EbpfTraceTEFOverheadRunner(FeatureExperiment, shorthand="ETEFo"): + """Test runner for feature performance.""" + + NAME = "RunEBPFTraceTEFProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + CONTAINER = ContainerImage().run('apt', 'install', '-y', 'bpftrace') + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.USDT_RAW, + RunBPFTracedWorkloadsOverhead + ) + + +class BccTraceTEFOverheadRunner(FeatureExperiment, shorthand="BCCo"): + """Test runner for feature performance.""" + + NAME = "RunBCCTEFProfilerO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + return setup_actions_for_vara_overhead_experiment( + self, project, FeatureInstrType.USDT, RunBCCTracedWorkloadsOverhead + ) + + +class RunBlackBoxBaselineOverhead(OutputFolderStep): # type: ignore + """Executes the traced project binaries on the specified workloads.""" + + NAME = "VaRARunTracedBinaries" + DESCRIPTION = "Run traced binary on workloads." + + project: VProject + + def __init__( + self, + project: VProject, + binary: ProjectBinaryWrapper, + report_file_ending: str = "txt", + reps: int = REPS + ) -> None: + super().__init__(project=project) + self.__binary = binary + self.__report_file_ending = report_file_ending + self.__reps = reps + + def call_with_output_folder(self, tmp_dir: Path) -> StepResult: + return self.run_traced_code(tmp_dir) + + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* {self.project.name}: Measure profiling overhead", indent * " " + ) + + def run_traced_code(self, tmp_dir: Path) -> StepResult: + """Runs the binary with the embedded tracing code.""" + with local.cwd(local.path(self.project.builddir)): + for rep in range(0, self.__reps): + for prj_command in perf_prec_workload_commands( + self.project, self.__binary + ): + time_report_file = tmp_dir / ( + f"overhead_{prj_command.command.label}_{rep}" + f".{self.__report_file_ending}" + ) + + with cleanup(prj_command): + print(f"Running example {prj_command.command.label}") + pb_cmd = prj_command.command.as_plumbum_wrapped_with( + time["-v", "-o", time_report_file], + project=self.project + ) + + pb_cmd(retcode=self.__binary.valid_exit_codes) + + return StepResult.OK + + +class BlackBoxOverheadBaseline(FeatureExperiment, shorthand="BBBaseO"): + """Test runner for feature performance.""" + + NAME = "GenBBBaselineO" + + REPORT_SPEC = ReportSpecification(TimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"] + + project.cflags += get_extra_cflags(project) + + project.ldflags += self.get_vara_tracing_ldflags() + + # Add the required runtime extensions to the project(s). + project.runtime_extension = bb_ext.run.RuntimeExtension( + project, self + ) << bb_ext.time.RunWithTime() + + # Add the required compiler extensions to the project(s). + project.compiler_extension = bb_ext.compiler.RunCompiler( + project, self + ) << WithUnlimitedStackSize() + + # Add own error handler to compile step. + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + + # TODO: change to multiple binaries + binary = select_project_binaries(project)[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary, + get_current_config_id(project) + ) + + analysis_actions = get_config_patch_steps(project) + + analysis_actions.append(actions.Compile(project)) + analysis_actions.append( + ZippedExperimentSteps( + result_filepath, + [ + RunBlackBoxBaselineOverhead( # type: ignore + project, + binary + ), + ] + ) + ) + analysis_actions.append(actions.Clean(project)) + + return analysis_actions diff --git a/varats/varats/experiments/vara/feature_perf_runner.py b/varats/varats/experiments/vara/feature_perf_runner.py index ad6c3b424..5544bb7f2 100644 --- a/varats/varats/experiments/vara/feature_perf_runner.py +++ b/varats/varats/experiments/vara/feature_perf_runner.py @@ -42,7 +42,7 @@ def actions_for_project( project.cflags += self.get_vara_feature_cflags(project) project.cflags += self.get_vara_tracing_cflags( - instr_type, project=project + instr_type, project=project, instruction_threshold=0 ) project.ldflags += self.get_vara_tracing_ldflags() @@ -83,7 +83,9 @@ def actions_for_project( ) -> tp.MutableSequence[actions.Step]: project.cflags += self.get_vara_feature_cflags(project) - project.cflags += self.get_vara_tracing_cflags(FeatureInstrType.TEF) + project.cflags += self.get_vara_tracing_cflags( + FeatureInstrType.TEF, instruction_threshold=1 + ) project.cflags += [ "-fxray-instrument", diff --git a/varats/varats/jupyterhelper/file.py b/varats/varats/jupyterhelper/file.py index 1d1e1ee32..ddc9d54ab 100644 --- a/varats/varats/jupyterhelper/file.py +++ b/varats/varats/jupyterhelper/file.py @@ -18,6 +18,10 @@ SZZReport, PyDrillerSZZReport, ) +from varats.experiments.vara.feature_perf_precision import ( + MPRTimeReportAggregate, +) +from varats.report.tef_report import TEFReport def load_commit_report(file_path: PathLikeTy) -> CommitReport: @@ -113,3 +117,25 @@ def load_feature_analysis_report(file_path: PathLikeTy) -> \ file_path (Path): Full path to the file """ return VDM.load_data_class_sync(file_path, FeatureAnalysisReport) + + +def load_tef_report(file_path: PathLikeTy) -> TEFReport: + """ + Load a FeatureAnalysisReport from a file. + + Attributes: + file_path (Path): Full path to the file + """ + return VDM.load_data_class_sync(file_path, TEFReport) + + +def load_mpr_time_report_aggregate( + file_path: PathLikeTy +) -> MPRTimeReportAggregate: + """ + Load a MPRTimeReportAggregate from a file. + + Attributes: + file_path (Path): Full path to the file + """ + return VDM.load_data_class_sync(file_path, MPRTimeReportAggregate) diff --git a/varats/varats/plots/feature_perf_precision.py b/varats/varats/plots/feature_perf_precision.py new file mode 100644 index 000000000..eee2a25cc --- /dev/null +++ b/varats/varats/plots/feature_perf_precision.py @@ -0,0 +1,426 @@ +"""Module for the FeaturePerfPrecision plots.""" +import typing as tp +from itertools import chain + +import matplotlib.colors as mcolors +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from matplotlib.axes import Axes +from matplotlib.text import Text + +from varats.data.databases.feature_perf_precision_database import ( + Profiler, + VXray, + PIMTracer, + EbpfTraceTEF, + load_precision_data, + load_overhead_data, +) +from varats.paper.paper_config import get_loaded_paper_config +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.plots.scatter_plot_utils import multivariate_grid +from varats.utils.exceptions import UnsupportedOperation +from varats.utils.git_util import FullCommitHash + + +class PerfPrecisionPlot(Plot, plot_name='fperf_precision'): + """Precision plot that plots the precision and recall values of different + profilers.""" + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + # Data aggregation + df = pd.DataFrame() + df = load_precision_data(case_studies, profilers) + df.sort_values(["CaseStudy"], inplace=True) + + grid = multivariate_grid( + df, + 'precision', + 'recall', + 'Profiler', + global_kde=False, + alpha=0.7, + legend=False, + s=100 + ) + grid.ax_marg_x.set_xlim(0.0, 1.02) + grid.ax_marg_y.set_ylim(0.0, 1.02) + grid.ax_joint.legend([name for name, _ in df.groupby("Profiler")]) + + grid.ax_joint.set_xlabel("Precision") + grid.ax_joint.set_ylabel("Recall") + grid.ax_joint.xaxis.label.set_size(20) + grid.ax_joint.yaxis.label.set_size(20) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + +class PerfPrecisionPlotGenerator( + PlotGenerator, generator_name="fperf-precision", options=[] +): + """Generates precision plot.""" + + def generate(self) -> tp.List[Plot]: + + return [PerfPrecisionPlot(self.plot_config, **self.plot_kwargs)] + + +class PerfPrecisionDistPlot(Plot, plot_name='fperf_precision_dist'): + """Precision plot that plots the precision and recall distributions of + different profilers.""" + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + # Data aggregation + df = pd.DataFrame() + df = load_precision_data(case_studies, profilers) + df.sort_values(["CaseStudy"], inplace=True) + df = df.melt( + id_vars=['CaseStudy', 'Patch', 'Profiler'], + value_vars=['precision', 'recall'], + var_name='metric', + value_name="value" + ) + + colors = sns.color_palette("Paired", len(profilers) * 2) + _, axes = plt.subplots(ncols=len(profilers), nrows=1, sharey=True) + + for idx, profiler in enumerate(profilers): + ax = axes[idx] + color_slice = colors[idx * 2:idx * 2 + 2] + data_slice = df[df['Profiler'] == profiler.name] + + sns.violinplot( + data=data_slice, + x='Profiler', + y='value', + hue='metric', + inner=None, + cut=0, + split=True, + palette=color_slice, + linewidth=1, + ax=ax + ) + + sns.stripplot( + data=data_slice, + x="Profiler", + y="value", + hue="metric", + jitter=0.15, + dodge=True, + linewidth=0.5, + marker='x', + palette=[ + mcolors.XKCD_COLORS['xkcd:dark grey'], + mcolors.CSS4_COLORS['dimgrey'] + ], + size=7, + ax=ax + ) + + ax.get_legend().remove() + + ax.set_ylabel(None) + ax.set_xlabel(None) + ax.tick_params(axis='x', labelsize=10, pad=8, length=6, width=1) + + if idx == 0: + ax.set_ylim(-0.1, 1.1) + ax.tick_params(axis='y', labelsize=10) + ax.tick_params(axis='y', width=1, length=3) + else: + ax.tick_params(left=False) + + plt.subplots_adjust(wspace=.0) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + +class PerfProfDistPlotGenerator( + PlotGenerator, generator_name="fperf-precision-dist", options=[] +): + """Generates performance distribution plot.""" + + def generate(self) -> tp.List[Plot]: + return [PerfPrecisionDistPlot(self.plot_config, **self.plot_kwargs)] + + +class PerfOverheadPlot(Plot, plot_name='fperf_overhead'): + """Performance overhead plot that shows the pareto front of the different + performance metrics.""" + + def plot(self, view_mode: bool) -> None: + # -- Configure plot -- + plot_metric = [ + ("Time", "overhead_time_rel"), + ("Memory", "overhead_memory_rel"), + ] + extra_metrics = False + if extra_metrics: + plot_metric.extend([ + ("Major Page Faults", "overhead_major_page_faults_rel"), + ("Minor Page Faults", "overhead_minor_page_faults_rel"), + ("Filesystem Inputs", "overhead_fs_inputs_rel"), + ("Filesystem Outputs", "overhead_fs_outputs_rel"), + ]) + + target_row = "f1_score" + + # Load data + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + # Data aggregation + full_precision_df = load_precision_data(case_studies, profilers) + full_precision_df.sort_values(["CaseStudy"], inplace=True) + + precision_df = full_precision_df[[ + "CaseStudy", "precision", "recall", "Profiler", "f1_score" + ]] + precision_df = precision_df.groupby(['CaseStudy', "Profiler"], + as_index=False).agg({ + 'precision': 'mean', + 'recall': 'mean', + 'f1_score': 'mean' + }) + + overhead_df = load_overhead_data(case_studies, profilers) + overhead_df['overhead_time_rel'] = overhead_df['time'] / ( + overhead_df['time'] - overhead_df['overhead_time'] + ) * 100 + + overhead_df['overhead_memory_rel'] = overhead_df['memory'] / ( + overhead_df['memory'] - overhead_df['overhead_memory'] + ) * 100 + overhead_df['overhead_memory_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + # Page faults + overhead_df['overhead_major_page_faults_rel' + ] = overhead_df['major_page_faults'] / ( + overhead_df['major_page_faults'] - + overhead_df['overhead_major_page_faults'] + ) * 100 + overhead_df['overhead_major_page_faults_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + overhead_df['overhead_minor_page_faults_rel' + ] = overhead_df['minor_page_faults'] / ( + overhead_df['minor_page_faults'] - + overhead_df['overhead_minor_page_faults'] + ) * 100 + overhead_df['overhead_minor_page_faults_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + # Filesystem + overhead_df['overhead_fs_inputs_rel'] = overhead_df['fs_inputs'] / ( + overhead_df['fs_inputs'] - overhead_df['overhead_fs_inputs'] + ) * 100 + overhead_df['overhead_fs_inputs_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + overhead_df['overhead_fs_outputs_rel'] = overhead_df['fs_outputs'] / ( + overhead_df['fs_outputs'] - overhead_df['overhead_fs_outputs'] + ) * 100 + overhead_df['overhead_fs_outputs_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + merged_df = pd.merge( + precision_df, overhead_df, on=["CaseStudy", "Profiler"] + ) + + rows = 1 + _, axes = plt.subplots( + ncols=int(len(plot_metric) / rows), nrows=rows, figsize=(30, 10) + ) + + if len(plot_metric) == 1: + self.do_single_plot( + plot_metric[0][1], target_row, merged_df, plot_metric[0][0], + axes + ) + else: + if rows == 1: + axes_list = list(axes) + else: + axes_list = list(chain.from_iterable(axes)) + + for idx, ax in enumerate(axes_list): + self.do_single_plot( + plot_metric[idx][1], target_row, merged_df, + plot_metric[idx][0], ax + ) + + def do_single_plot( + self, x_values_name: str, target_row: str, merged_df: pd.DataFrame, + plot_extra_name: str, ax: Axes + ) -> None: + """Plot a single overhead metric.""" + sns.scatterplot( + merged_df, + x=x_values_name, + y=target_row, + hue="Profiler", + style='CaseStudy', + alpha=0.5, + s=300, + ax=ax + ) + + text_obj: Text + for text_obj in ax.legend().get_texts(): + + text_obj.set_fontsize("xx-large") + if text_obj.get_text() == "Profiler": + text_obj.set_text("Profilers") + text_obj.set_fontweight("bold") + + if text_obj.get_text() == "CaseStudy": + text_obj.set_text("Subject Systems") + text_obj.set_fontweight("bold") + + ax.set_xlabel(f"Relative {plot_extra_name}") + if target_row == "f1_score": + ax.set_ylabel("F1-Score") + + ax.set_ylim(0.0, 1.02) + # Sets the limit at least to 150 or otherwise to the largest non + # inf/nan value + x_limit = max( + np.max( + np.nan_to_num( + merged_df[x_values_name], + copy=True, + nan=0.0, + posinf=0.0, + neginf=0.0 + ) + ) + 20, 120 + ) + ax.set_xlim(x_limit, 100) + ax.tick_params(labelsize=20, pad=10) + ax.xaxis.label.set_fontsize(25) + ax.yaxis.label.set_fontsize(25) + ax.yaxis.labelpad = 10 + ax.xaxis.labelpad = 20 + + prof_df = merged_df[[ + 'Profiler', 'precision', x_values_name, 'f1_score' + ]].groupby('Profiler').agg(['mean', 'std']) + prof_df.fillna(0, inplace=True) + + pareto_front = self.plot_pareto_frontier( + prof_df[x_values_name]['mean'], + prof_df[target_row]['mean'], + max_x=False + ) + + pf_x = [pair[0] for pair in pareto_front] + pf_y = [pair[1] for pair in pareto_front] + + x_loc = prof_df[x_values_name]['mean'] + y_loc = prof_df[target_row]['mean'] + x_error = prof_df[x_values_name]['std'] + y_error = prof_df[target_row]['std'] + + ax.errorbar( + x_loc, + y_loc, + xerr=x_error, + yerr=y_error, + fmt='none', + color='grey', + zorder=0, + capsize=2, + capthick=0.6, + elinewidth=0.6 + ) + + sns.scatterplot( + prof_df, + x=(x_values_name, 'mean'), + y=(target_row, 'mean'), + hue="Profiler", + ax=ax, + legend=False, + s=300, + zorder=2 + ) + + sns.lineplot( + x=pf_x, + y=pf_y, + ax=ax, + color='firebrick', + legend=False, + linewidth=3.5, + zorder=1 + ) + + def plot_pareto_frontier( + self, + x_values: tp.List[float], + y_values: tp.List[float], + max_x: bool = True, + max_y: bool = True + ) -> tp.List[tp.List[float]]: + """Pareto frontier selection process.""" + sorted_list = sorted([ + [x_values[i], y_values[i]] for i in range(len(x_values)) + ], + reverse=max_x) + pareto_front = [sorted_list[0]] + for pair in sorted_list[1:]: + if max_y: + if pair[1] >= pareto_front[-1][1]: + if pair[0] == pareto_front[-1][0]: + # If both points, have the same x-values, we should + # only keep the larger one + pareto_front[-1][1] = pair[1] + else: + pareto_front.append(pair) + else: + if pair[1] <= pareto_front[-1][1]: + if pair[0] == pareto_front[-1][0]: + # If both points, have the same x-values, we should + # only keep the smaller one + pareto_front[-1][1] = pair[1] + else: + pareto_front.append(pair) + + return pareto_front + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + +class PerfOverheadPlotGenerator( + PlotGenerator, generator_name="fperf-overhead", options=[] +): + """Generates overhead plot.""" + + def generate(self) -> tp.List[Plot]: + return [PerfOverheadPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/projects/cpp_projects/hyteg.py b/varats/varats/projects/cpp_projects/hyteg.py index 62fee6190..ecd90ca5b 100644 --- a/varats/varats/projects/cpp_projects/hyteg.py +++ b/varats/varats/projects/cpp_projects/hyteg.py @@ -7,7 +7,6 @@ from benchbuild.command import WorkloadSet, SourceRoot from benchbuild.utils.cmd import ninja, cmake, mkdir from benchbuild.utils.revision_ranges import SingleRevision -from benchbuild.utils.settings import get_number_of_jobs from plumbum import local from varats.experiment.workload_util import WorkloadCategory, RSBinary @@ -21,8 +20,8 @@ from varats.project.sources import FeatureSource from varats.project.varats_command import VCommand from varats.project.varats_project import VProject +from varats.utils.git_commands import update_all_submodules from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap -from varats.utils.settings import bb_cfg LOG = logging.getLogger(__name__) @@ -109,6 +108,8 @@ def compile(self) -> None: mkdir("-p", hyteg_source / "build") + update_all_submodules(hyteg_source, recursive=True, init=True) + cc_compiler = bb.compiler.cc(self) cxx_compiler = bb.compiler.cxx(self) diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 2ef8e4338..07e1fadb0 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -1332,6 +1332,17 @@ class SynthFeatureLargeConfigSpace(VProject): FeatureSource() ] + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthFeatureLargeConfigSpace") / + RSBinary("LargeConfigSpace"), + ConfigParams(), + label="RestrictedConfigSpace-no-input" + ) + ] + } + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 @@ -1379,6 +1390,17 @@ class SynthFeatureRestrictedConfigSpace(VProject): FeatureSource() ] + WORKLOADS = { + WorkloadSet(WorkloadCategory.EXAMPLE): [ + VCommand( + SourceRoot("SynthFeatureRestrictedConfigSpace") / + RSBinary("RestrictedConfigSpace"), + ConfigParams(), + label="RestrictedConfigSpace-no-input" + ) + ] + } + @staticmethod def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py new file mode 100644 index 000000000..b11c6baa6 --- /dev/null +++ b/varats/varats/tables/feature_perf_precision.py @@ -0,0 +1,538 @@ +"""Module for the FeaturePerfPrecision tables.""" +import re +import typing as tp +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import numpy.typing as npt +import pandas as pd +from benchbuild.utils.cmd import git +from matplotlib import colors +from plumbum import local +from pylatex import Document, Package + +from varats.data.databases.feature_perf_precision_database import ( + get_patch_names, + get_regressing_config_ids_gt, + map_to_positive_config_ids, + map_to_negative_config_ids, + Profiler, + VXray, + PIMTracer, + EbpfTraceTEF, + compute_profiler_predictions, + load_precision_data, + load_overhead_data, +) +from varats.data.metrics import ConfusionMatrix +from varats.paper.case_study import CaseStudy +from varats.paper.paper_config import get_loaded_paper_config +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import get_local_project_git_path +from varats.table.table import Table +from varats.table.table_utils import dataframe_to_table +from varats.table.tables import TableFormat, TableGenerator +from varats.utils.git_util import calc_repo_loc, ChurnConfig + + +def cmap_map( + function: tp.Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], + cmap: colors.LinearSegmentedColormap +) -> colors.LinearSegmentedColormap: + """ + Applies function (which should operate on vectors of shape 3: [r, g, b]), on + colormap cmap. + + This routine will break any discontinuous points in a colormap. + """ + # pylint: disable=protected-access + c_dict = cmap._segmentdata # type: ignore + # pylint: enable=protected-access + step_dict: tp.Dict[str, tp.List[tp.Any]] = {} + + # First get the list of points where the segments start or end + for key in ('red', 'green', 'blue'): + step_dict[key] = list(map(lambda x: x[0], c_dict[key])) + step_list = sum(step_dict.values(), []) + step_array = np.array(list(set(step_list))) + + # Then compute the LUT, and apply the function to the LUT + def reduced_cmap(step: np.float64) -> npt.NDArray: + return np.array(cmap(step)[0:3]) + + old_lut = np.array(list(map(reduced_cmap, step_array))) + new_lut = np.array(list(map(function, old_lut))) + + # Now try to make a minimal segment definition of the new LUT + c_dict = {} + for i, key in enumerate(['red', 'green', 'blue']): + this_c_dict = {} + for j, step in enumerate(step_array): + if step in step_dict[key]: + this_c_dict[step] = new_lut[j, i] + elif new_lut[j, i] != old_lut[j, i]: + this_c_dict[step] = new_lut[j, i] + colorvector = list(map(lambda x: x + (x[1],), this_c_dict.items())) + colorvector.sort() + c_dict[key] = colorvector + + return colors.LinearSegmentedColormap('colormap', c_dict, 1024) + + +class FeaturePerfPrecisionTable(Table, table_name="fperf_precision"): + """Table that compares the precision of different feature performance + measurement approaches.""" + + @staticmethod + def _prepare_data_table( + case_studies: tp.List[CaseStudy], profilers: tp.List[Profiler] + ) -> pd.DataFrame: + df = pd.DataFrame() + table_rows = [] + + for case_study in case_studies: + for patch_name in get_patch_names(case_study): + rev = case_study.revisions[0] + project_name = case_study.project_name + + ground_truth = get_regressing_config_ids_gt( + project_name, case_study, rev, patch_name + ) + + new_row = { + 'CaseStudy': + project_name, + 'Patch': + patch_name, + 'Configs': + len(case_study.get_config_ids_for_revision(rev)), + 'RegressedConfigs': + len(map_to_positive_config_ids(ground_truth)) + if ground_truth else -1 + } + + for profiler in profilers: + predicted = compute_profiler_predictions( + profiler, project_name, case_study, + case_study.get_config_ids_for_revision(rev), patch_name + ) + + if ground_truth and predicted: + results = ConfusionMatrix( + map_to_positive_config_ids(ground_truth), + map_to_negative_config_ids(ground_truth), + map_to_positive_config_ids(predicted), + map_to_negative_config_ids(predicted) + ) + new_row[f"{profiler.name}_precision" + ] = results.precision() + new_row[f"{profiler.name}_recall"] = results.recall() + new_row[f"{profiler.name}_baccuracy" + ] = results.balanced_accuracy() + else: + new_row[f"{profiler.name}_precision"] = np.nan + new_row[f"{profiler.name}_recall"] = np.nan + new_row[f"{profiler.name}_baccuracy"] = np.nan + + table_rows.append(new_row) + + return pd.concat([df, pd.DataFrame(table_rows)]) + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + """Setup performance precision table.""" + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer()] + + # Data aggregation + df = self._prepare_data_table(case_studies, profilers) + df.sort_values(["CaseStudy"], inplace=True) + print(f"{df=}") + + # insert totals + totals = { + 'CaseStudy': "Total (avg)", + "Configs": 0, + "RegressedConfigs": 0 + } + for profiler in profilers: + totals[f"{profiler.name}_precision"] = df[ + f"{profiler.name}_precision"].mean() + totals[f"{profiler.name}_recall"] = df[f"{profiler.name}_recall" + ].mean() + totals[f"{profiler.name}_baccuracy"] = df[ + f"{profiler.name}_baccuracy"].mean() + + tdf = pd.DataFrame(totals, index=[0]) + df = pd.concat([df, tdf], ignore_index=True) + + print(f"{df=}") + + symb_precision = "\\textsc{PPV}" + symb_recall = "\\textsc{TPR}" + symb_b_accuracy = "\\textsc{BA}" + symb_configs = "$\\mathbb{C}$" + symb_regressed_configs = "$\\mathbb{R}$" + + print(f"{df=}") + colum_setup = [(' ', 'CaseStudy'), (' ', 'Patch'), + ('', f'{symb_configs}'), + ('', f'{symb_regressed_configs}')] + for profiler in profilers: + colum_setup.append((profiler.name, f'{symb_precision}')) + colum_setup.append((profiler.name, f'{symb_recall}')) + colum_setup.append((profiler.name, f'{symb_b_accuracy}')) + + print(f"{colum_setup=}") + df.columns = pd.MultiIndex.from_tuples(colum_setup) + + print(f"{df=}") + + # Table config + style: pd.io.formats.style.Styler = df.style + kwargs: tp.Dict[str, tp.Any] = {} + if table_format.is_latex(): + kwargs["hrules"] = True + column_format = "l|rr" + for _ in profilers: + column_format += "|rrr" + kwargs["column_format"] = column_format + kwargs["multicol_align"] = "|c" + # pylint: disable=line-too-long + kwargs[ + "caption" + ] = f"""Localization precision of different performance profiling approaches to detect configuration-specific performance regression detection. +On the left, we show the amount of different configurations ({symb_configs}) analyzed and the amount of regressed configurations ({symb_regressed_configs}), determined through our baseline measurements. +Furthermore, the table depicts for each profiler, precision ({symb_precision}), recall ({symb_recall}), and balanced accuracy ({symb_b_accuracy}). +""" + # pylint: enable=line-too-long + style.format(precision=2) + style.hide() + + def add_extras(doc: Document) -> None: + doc.packages.append(Package("amsmath")) + doc.packages.append(Package("amssymb")) + + return dataframe_to_table( + df, + table_format, + style=style, + wrap_table=wrap_table, + wrap_landscape=True, + document_decorator=add_extras, + **kwargs + ) + + +class FeaturePerfPrecisionTableGenerator( + TableGenerator, generator_name="fperf-precision", options=[] +): + """Generator for `FeaturePerfPrecisionTable`.""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfPrecisionTable(self.table_config, **self.table_kwargs) + ] + + +def truncate_colormap( + cmap: colors.Colormap, + minval: float = 0.0, + maxval: float = 1.0, + n: int = 100 +) -> colors.LinearSegmentedColormap: + """ + Truncates a given color map to a specific range and number of elements. + + Args: + cmap: the original colormap + minval: smallest color value + maxval: largest color value + n: number of colors that should be in the map + + Returns: color map truncated to the given parameters + """ + new_cmap = colors.LinearSegmentedColormap.from_list( + f"trunc({cmap.name},{minval:.2f},{maxval:.2f})", + cmap(np.linspace(minval, maxval, n)) + ) + return new_cmap + + +class FeaturePerfOverheadComparisionTable(Table, table_name="fperf_overhead"): + """Table that compares overhead of different feature performance measurement + approaches.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + """Setup performance overhead comparision table.""" + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + # Data aggregation + full_precision_df = load_precision_data(case_studies, profilers) + full_precision_df.sort_values(["CaseStudy"], inplace=True) + + precision_df = full_precision_df[[ + "CaseStudy", "precision", "recall", "Profiler", "f1_score" + ]] + # aggregate multiple revisions + precision_df = precision_df.groupby(['CaseStudy', "Profiler"], + as_index=False).agg({ + 'precision': 'mean', + 'recall': 'mean', + 'f1_score': 'mean' + }) + + overhead_df = load_overhead_data(case_studies, profilers) + overhead_df = overhead_df[[ + "CaseStudy", "Profiler", "time", "memory", "overhead_time", + "overhead_memory" + ]] + overhead_df['overhead_time_rel'] = overhead_df['time'] / ( + overhead_df['time'] - overhead_df['overhead_time'] + ) * 100 - 100 + + overhead_df['overhead_memory_rel'] = overhead_df['memory'] / ( + overhead_df['memory'] - overhead_df['overhead_memory'] + ) * 100 - 100 + overhead_df['overhead_memory_rel'].replace([np.inf, -np.inf], + np.nan, + inplace=True) + + # Merge with precision data + merged_df = pd.merge( + precision_df, overhead_df, on=["CaseStudy", "Profiler"] + ) + + pivot_df = merged_df.pivot( + index='CaseStudy', + columns='Profiler', + values=[ + 'precision', 'recall', 'overhead_time_rel', + 'overhead_memory_rel', 'overhead_memory' + ] + ) + + pivot_df = pivot_df.swaplevel(0, 1, 1).sort_index(axis=1) + + columns = [ + 'precision', 'recall', 'overhead_time_rel', 'overhead_memory_rel', + 'overhead_memory' + ] + pivot_df = pivot_df.reindex([ + (prof.name, c) for prof in profilers for c in columns + ], + axis=1) + + pivot_df.loc["Total"] = pivot_df.mean() + + # Rename columns + # pylint: disable=anomalous-backslash-in-string + overhead_time_c_name = "$\Delta$ Time $(\%)$" + overhead_memory_c_name = "$\Delta$ Mem $(\%)$" + overhead_memory_val_c_name = "$\Delta$ Mem $(Kbyte)$" + # pylint: enable=anomalous-backslash-in-string + pivot_df = pivot_df.rename( + columns={ + "precision": "Precision", + "recall": "Recall", + "overhead_time_rel": overhead_time_c_name, + "overhead_memory_rel": overhead_memory_c_name, + "overhead_memory": overhead_memory_val_c_name, + } + ) + + style: pd.io.formats.style.Styler = pivot_df.style + kwargs: tp.Dict[str, tp.Any] = {} + + def add_extras(doc: Document) -> None: + doc.packages.append(Package("amsmath")) + doc.packages.append(Package("amssymb")) + + if table_format.is_latex(): + mv_columns = [ + (prof.name, overhead_memory_val_c_name) for prof in profilers + ] + style.format({col: "{:.0f}" for col in mv_columns}, precision=2) + + ryg_map = cmap_map( + lambda x: x / 1.2 + 0.2, + tp.cast(colors.LinearSegmentedColormap, plt.get_cmap('RdYlGn')) + ) + + style.background_gradient( + cmap=ryg_map, + subset=[(prof.name, 'Precision') for prof in profilers], + vmin=0.0, + vmax=1.0, + ) + style.background_gradient( + cmap=ryg_map, + subset=[(prof.name, 'Recall') for prof in profilers], + vmin=0.0, + vmax=1.0, + ) + + gray_map = plt.get_cmap('binary') + gray_map = truncate_colormap(gray_map, 0, 0.6, 200) + style.background_gradient( + cmap=gray_map, + subset=[(prof.name, overhead_time_c_name) for prof in profilers + ], + vmin=0.0, + vmax=100.0, + ) + + style.background_gradient( + cmap=gray_map, + subset=[ + (prof.name, overhead_memory_c_name) for prof in profilers + ], + vmin=0.0, + vmax=100.0, + ) + + kwargs["convert_css"] = True + kwargs["column_format"] = "l" + "".join([ + "rrrrr" for _ in profilers + ]) + kwargs["hrules"] = True + kwargs["multicol_align"] = "c" + + return dataframe_to_table( + data=pivot_df, + table_format=table_format, + style=style, + wrap_table=wrap_table, + wrap_landscape=True, + document_decorator=add_extras, + **kwargs + ) + + +class FeaturePerfOverheadComparisionTableGenerator( + TableGenerator, generator_name="fperf-overhead-comp", options=[] +): + """Generator for `FeaturePerfOverheadTable`.""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfOverheadComparisionTable( + self.table_config, **self.table_kwargs + ) + ] + + +class FeaturePerfMetricsOverviewTable(Table, table_name="fperf_overview"): + """Table showing some general information about feature performance case + studies.""" + + @staticmethod + def _calc_folder_locs(repo_path: Path, rev_range: str, folder: str) -> int: + churn_config = ChurnConfig.create_c_style_languages_config() + file_pattern = re.compile( + "|".join(churn_config.get_extensions_repr(r"^.*\.", r"$")) + ) + + loc: int = 0 + with local.cwd(repo_path): + files = git( + "ls-tree", + "-r", + "--name-only", + rev_range, + ).splitlines() + + for file in files: + if not file.startswith(folder): + continue + if file_pattern.match(file): + lines = git("show", f"{rev_range}:{file}").splitlines() + loc += len([line for line in lines if line]) + + return loc + + @staticmethod + def _calc_folder_locs_dune(repo_path: Path, rev_range: str) -> int: + dune_sub_projects = [ + "dune-alugrid", "dune-common", "dune-functions", "dune-geometry", + "dune-grid", "dune-istl", "dune-localfunctions", + "dune-multidomaingrid", "dune-pdelab", "dune-typetree", + "dune-uggrid" + ] + total_locs = 0 + + total_locs += calc_repo_loc(repo_path, rev_range) + + for sub_project in dune_sub_projects: + sub_project_path = repo_path / sub_project + locs = calc_repo_loc(sub_project_path, "HEAD") + total_locs += locs + + return total_locs + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + profilers: tp.List[Profiler] = [VXray(), PIMTracer(), EbpfTraceTEF()] + + df_precision = load_precision_data(case_studies, profilers) + + cs_data: tp.List[pd.DataFrame] = [] + for case_study in case_studies: + project_name = case_study.project_name + rev = case_study.revisions[0] + project_git_path = get_local_project_git_path(project_name) + + cs_precision_data = df_precision[df_precision['CaseStudy'] == + project_name] + regressions = len(cs_precision_data['Patch'].unique()) + + locs: int + if case_study.project_cls.DOMAIN == ProjectDomains.TEST: + src_folder = f'projects/{project_name}' + if src_folder.endswith( + "projects/SynthCTTemplateSpecialization" + ): + src_folder = "projects/SynthCTSpecialization" + locs = self._calc_folder_locs( + project_git_path, rev.hash, src_folder + ) + elif case_study.project_cls.NAME == "DunePerfRegression": + locs = self._calc_folder_locs_dune(project_git_path, rev.hash) + else: + locs = calc_repo_loc(project_git_path, rev.hash) + + cs_dict = { + project_name: { + "NumConfig": + len(case_study.get_config_ids_for_revision(rev)), + "Locs": + locs, + "Regressions": + regressions, + } + } + + cs_data.append(pd.DataFrame.from_dict(cs_dict, orient='index')) + + df = pd.concat(cs_data).sort_index() + + style = df.style + kwargs: tp.Dict[str, tp.Any] = {} + if table_format.is_latex(): + kwargs["hrules"] = True + style.format(thousands=r"\,") + return dataframe_to_table(df, table_format, style, wrap_table, **kwargs) + + +class FeaturePerfMetricsOverviewTableGenerator( + TableGenerator, generator_name="fperf-overview", options=[] +): + """Generates a cs-metrics table for the selected case study(ies).""" + + def generate(self) -> tp.List[Table]: + return [ + FeaturePerfMetricsOverviewTable( + self.table_config, **self.table_kwargs + ) + ] diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 482f8ae02..4999b9f48 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -115,6 +115,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None: 'varats.experiments.vara.feature_perf_runner', 'varats.experiments.vara.feature_perf_sampling', 'varats.experiments.vara.feature_perf_tracing', + 'varats.experiments.vara.feature_perf_precision', 'varats.experiments.vara.feature_tracing_stats', 'varats.experiments.vara.feature_instrumentation_points', 'varats.experiments.vara.instrumentation_verifier',