Simplify BlackBox Experiments for FeaturePerfPrecision (#887)

The current implementations of the black box experiments for the feature perf precision experiments are basically equivalent to the setup_actions_for_vara_experiment/setup_actions_for_vara_overhead_experiment methods. With some minor changes we can avoid this code duplication Co-authored-by: Florian Sattler <[email protected]>
se-sic · Sep 3, 2024 · d488352 · d488352
1 parent 88fb16d
commit d488352
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 153 deletions.
diff --git a/varats/varats/data/databases/feature_perf_precision_database.py b/varats/varats/data/databases/feature_perf_precision_database.py
@@ -427,7 +427,7 @@ class EbpfTraceTEF(Profiler):
     def __init__(self) -> None:
         super().__init__(
             "eBPFTrace", fpp.EbpfTraceTEFProfileRunner,
-            fpp.TEFProfileOverheadRunner, fpp.MPRTEFAggregate
+            fpp.EbpfTraceTEFOverheadRunner, fpp.MPRTEFAggregate
         )
 
     def is_regression(

diff --git a/varats/varats/experiments/vara/feature_perf_precision.py b/varats/varats/experiments/vara/feature_perf_precision.py
@@ -102,11 +102,16 @@ def select_project_binaries(project: VProject) -> tp.List[ProjectBinaryWrapper]:
 
 def get_extra_cflags(project: VProject) -> tp.List[str]:
     """Get additional cflags for some projects."""
+    extra_flags = []
+
+    if project.name == "DunePerfRegression":
+        extra_flags += ["-pthread"]
+
     if project.name in ["DunePerfRegression", "HyTeg"]:
         # Disable phasar for dune as the analysis cannot handle dunes size
-        return ["-fvara-disable-phasar"]
+        extra_flags += ["-fvara-disable-phasar"]
 
-    return []
+    return extra_flags
 
 
 def get_threshold(project: VProject) -> int:
@@ -399,9 +404,11 @@ def attach_usdt_bcc(report_file: Path, binary: Path) -> Future:
 
 
 def setup_actions_for_vara_experiment(
-    experiment: FeatureExperiment, project: VProject,
+    experiment: FeatureExperiment,
+    project: VProject,
     instr_type: FeatureInstrType,
-    analysis_step: tp.Type[AnalysisProjectStepBaseTy]
+    analysis_step: tp.Type[AnalysisProjectStepBaseTy],
+    report_type=MultiPatchReport
 ) -> tp.MutableSequence[actions.Step]:
     """Sets up actions for a given perf precision experiment."""
 
@@ -458,7 +465,7 @@ def setup_actions_for_vara_experiment(
             analysis_step(
                 project,
                 binary,
-                file_name=MultiPatchReport.create_patched_report_name(
+                file_name=report_type.create_patched_report_name(
                     patch, "rep_measurements"
                 )
             )
@@ -474,7 +481,7 @@ def setup_actions_for_vara_experiment(
                 analysis_step(
                     project,
                     binary,
-                    file_name=MultiPatchReport.
+                    file_name=report_type.
                     create_baseline_report_name("rep_measurements")
                 )
             ] + patch_steps
@@ -587,7 +594,7 @@ def actions_for_project(
         )
 
 
-class RunBlackBoxBaseline(OutputFolderStep):  # type: ignore
+class RunBlackBoxBaseline(AnalysisProjectStepBase):  # type: ignore
     """Executes the traced project binaries on the specified workloads."""
 
     NAME = "VaRARunTracedBinaries"
@@ -603,11 +610,7 @@ def __init__(
         report_file_ending: str = "txt",
         reps: int = REPS
     ) -> None:
-        super().__init__(project=project)
-        self.__binary = binary
-        self.__report_file_ending = report_file_ending
-        self.__reps = reps
-        self.__file_name = file_name
+        super().__init__(project, binary, file_name, report_file_ending, reps)
 
     def call_with_output_folder(self, tmp_dir: Path) -> StepResult:
         return self.run_traced_code(tmp_dir)
@@ -620,15 +623,15 @@ def __str__(self, indent: int = 0) -> str:
     def run_traced_code(self, tmp_dir: Path) -> StepResult:
         """Runs the binary with the embedded tracing code."""
         with local.cwd(local.path(self.project.builddir)):
-            zip_tmp_dir = tmp_dir / self.__file_name
+            zip_tmp_dir = tmp_dir / self._file_name
             with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir:
-                for rep in range(0, self.__reps):
+                for rep in range(0, self._reps):
                     for prj_command in perf_prec_workload_commands(
-                        self.project, self.__binary
+                        self.project, self._binary
                     ):
                         time_report_file = Path(reps_tmp_dir) / (
                             f"baseline_{prj_command.command.label}_{rep}"
-                            f".{self.__report_file_ending}"
+                            f".{self._report_file_ending}"
                         )
 
                         print(f"Running example {prj_command.command.label}")
@@ -639,7 +642,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult:
                                     time["-v", "-o", time_report_file],
                                     project=self.project
                                 )
-                            pb_cmd(retcode=self.__binary.valid_exit_codes)
+                            pb_cmd(retcode=self._binary.valid_exit_codes)
 
         return StepResult.OK
 
@@ -661,78 +664,10 @@ def actions_for_project(
         Args:
             project: to analyze
         """
-        project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"]
-
-        project.cflags += get_extra_cflags(project)
-
-        project.ldflags += self.get_vara_tracing_ldflags()
-
-        # Add the required runtime extensions to the project(s).
-        project.runtime_extension = bb_ext.run.RuntimeExtension(
-            project, self
-        ) << bb_ext.time.RunWithTime()
-
-        # Add the required compiler extensions to the project(s).
-        project.compiler_extension = bb_ext.compiler.RunCompiler(
-            project, self
-        ) << WithUnlimitedStackSize()
-
-        # Add own error handler to compile step.
-        project.compile = get_default_compile_error_wrapped(
-            self.get_handle(), project, self.REPORT_SPEC.main_report
-        )
-
-        # TODO: change to multiple binaries
-        binary = select_project_binaries(project)[0]
-        if binary.type != BinaryType.EXECUTABLE:
-            raise AssertionError("Experiment only works with executables.")
-
-        result_filepath = create_new_success_result_filepath(
-            self.get_handle(),
-            self.get_handle().report_spec().main_report, project, binary,
-            get_current_config_id(project)
-        )
-
-        patch_provider = PatchProvider.get_provider_for_project(project)
-        patches = patch_provider.get_patches_for_revision(
-            ShortCommitHash(project.version_of_primary)
-        )[IDENTIFIER_PATCH_TAG]
-        print(f"{patches=}")
-
-        patch_steps = []
-        for patch in patches:
-            print(f"Got patch with path: {patch.path}")
-            patch_steps.append(ApplyPatch(project, patch))
-            patch_steps.append(ReCompile(project))
-            patch_steps.append(
-                RunBlackBoxBaseline(
-                    project,
-                    binary,
-                    file_name=MPRTimeReportAggregate.create_patched_report_name(
-                        patch, "rep_measurements"
-                    )
-                )
-            )
-            patch_steps.append(RevertPatch(project, patch))
-
-        analysis_actions = get_config_patch_steps(project)
-
-        analysis_actions.append(actions.Compile(project))
-        analysis_actions.append(
-            ZippedExperimentSteps(
-                result_filepath, [
-                    RunBlackBoxBaseline(
-                        project,
-                        binary,
-                        file_name=MPRTimeReportAggregate.
-                        create_baseline_report_name("rep_measurements")
-                    )
-                ] + patch_steps
-            )
+        return setup_actions_for_vara_experiment(
+            self, project, FeatureInstrType.NONE, RunBlackBoxBaseline,
+            MPRTimeReportAggregate
         )
-        analysis_actions.append(actions.Clean(project))
-
-        return analysis_actions
 
 
 ################################################################################
@@ -1097,7 +1032,7 @@ def actions_for_project(
         )
 
 
-class RunBlackBoxBaselineOverhead(OutputFolderStep):  # type: ignore
+class RunBlackBoxBaselineOverhead(AnalysisProjectStepBase):  # type: ignore
     """Executes the traced project binaries on the specified workloads."""
 
     NAME = "VaRARunTracedBinaries"
@@ -1109,13 +1044,11 @@ def __init__(
         self,
         project: VProject,
         binary: ProjectBinaryWrapper,
+        file_name: str,
         report_file_ending: str = "txt",
         reps: int = REPS
     ) -> None:
-        super().__init__(project=project)
-        self.__binary = binary
-        self.__report_file_ending = report_file_ending
-        self.__reps = reps
+        super().__init__(project, binary, file_name, report_file_ending, reps)
 
     def call_with_output_folder(self, tmp_dir: Path) -> StepResult:
         return self.run_traced_code(tmp_dir)
@@ -1128,13 +1061,13 @@ def __str__(self, indent: int = 0) -> str:
     def run_traced_code(self, tmp_dir: Path) -> StepResult:
         """Runs the binary with the embedded tracing code."""
         with local.cwd(local.path(self.project.builddir)):
-            for rep in range(0, self.__reps):
+            for rep in range(0, self._reps):
                 for prj_command in perf_prec_workload_commands(
-                    self.project, self.__binary
+                    self.project, self._binary
                 ):
                     time_report_file = tmp_dir / (
                         f"overhead_{prj_command.command.label}_{rep}"
-                        f".{self.__report_file_ending}"
+                        f".{self._report_file_ending}"
                     )
 
                     with cleanup(prj_command):
@@ -1144,7 +1077,7 @@ def run_traced_code(self, tmp_dir: Path) -> StepResult:
                             project=self.project
                         )
 
-                        pb_cmd(retcode=self.__binary.valid_exit_codes)
+                        pb_cmd(retcode=self._binary.valid_exit_codes)
 
         return StepResult.OK
 
@@ -1166,52 +1099,6 @@ def actions_for_project(
         Args:
             project: to analyze
         """
-        project.cflags += ["-flto", "-fuse-ld=lld", "-fno-omit-frame-pointer"]
-
-        project.cflags += get_extra_cflags(project)
-
-        project.ldflags += self.get_vara_tracing_ldflags()
-
-        # Add the required runtime extensions to the project(s).
-        project.runtime_extension = bb_ext.run.RuntimeExtension(
-            project, self
-        ) << bb_ext.time.RunWithTime()
-
-        # Add the required compiler extensions to the project(s).
-        project.compiler_extension = bb_ext.compiler.RunCompiler(
-            project, self
-        ) << WithUnlimitedStackSize()
-
-        # Add own error handler to compile step.
-        project.compile = get_default_compile_error_wrapped(
-            self.get_handle(), project, self.REPORT_SPEC.main_report
-        )
-
-        # TODO: change to multiple binaries
-        binary = select_project_binaries(project)[0]
-        if binary.type != BinaryType.EXECUTABLE:
-            raise AssertionError("Experiment only works with executables.")
-
-        result_filepath = create_new_success_result_filepath(
-            self.get_handle(),
-            self.get_handle().report_spec().main_report, project, binary,
-            get_current_config_id(project)
-        )
-
-        analysis_actions = get_config_patch_steps(project)
-
-        analysis_actions.append(actions.Compile(project))
-        analysis_actions.append(
-            ZippedExperimentSteps(
-                result_filepath,
-                [
-                    RunBlackBoxBaselineOverhead(  # type: ignore
-                        project,
-                        binary
-                    ),
-                ]
-            )
+        return setup_actions_for_vara_overhead_experiment(
+            self, project, FeatureInstrType.NONE, RunBlackBoxBaselineOverhead
         )
-        analysis_actions.append(actions.Clean(project))
-
-        return analysis_actions
diff --git a/varats/varats/tables/feature_perf_precision.py b/varats/varats/tables/feature_perf_precision.py
@@ -33,10 +33,9 @@
 from varats.table.table import Table
 from varats.table.table_utils import dataframe_to_table
 from varats.table.tables import TableFormat, TableGenerator
+from varats.ts_utils.cli_util import make_cli_option
 from varats.utils.git_util import calc_repo_loc, ChurnConfig
 
-GROUP_SYNTHETIC_CATEGORIES = True
-
 SYNTH_CATEGORIES = [
     "Static Analysis", "Dynamic Analysis", "Configurability",
     "Implementation Pattern"
@@ -251,8 +250,18 @@ def add_extras(doc: Document) -> None:
         )
 
 
+GROUP_SYNTHETIC_OPTION = make_cli_option(
+    "--group-synth",
+    type=bool,
+    default=False,
+    help="Group synthetic case studies in tables."
+)
+
+
 class FeaturePerfPrecisionTableGenerator(
-    TableGenerator, generator_name="fperf-precision", options=[]
+    TableGenerator,
+    generator_name="fperf-precision",
+    options=[GROUP_SYNTHETIC_OPTION]
 ):
     """Generator for `FeaturePerfPrecisionTable`."""
 
@@ -331,7 +340,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
             precision_df, overhead_df, on=["CaseStudy", "Profiler"]
         )
 
-        if GROUP_SYNTHETIC_CATEGORIES:
+        if self.table_kwargs["group_synth"]:
 
             merged_df["CaseStudy"] = merged_df["CaseStudy"].apply(
                 compute_cs_category_grouping
@@ -368,7 +377,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
 
         # All means need to be computed before they are added as rows
         overall_mean = pivot_df.mean()
-        if GROUP_SYNTHETIC_CATEGORIES:
+        if self.table_kwargs["group_synth"]:
             synth_mean = pivot_df.loc[pivot_df.index.isin(SYNTH_CATEGORIES)
                                      ].mean()
             real_world_mean = pivot_df.loc[~pivot_df.index.
@@ -464,7 +473,9 @@ def add_extras(doc: Document) -> None:
 
 
 class FeaturePerfOverheadComparisionTableGenerator(
-    TableGenerator, generator_name="fperf-overhead-comp", options=[]
+    TableGenerator,
+    generator_name="fperf-overhead-comp",
+    options=[GROUP_SYNTHETIC_OPTION]
 ):
     """Generator for `FeaturePerfOverheadTable`."""
 
@@ -571,7 +582,7 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
         df = pd.concat(cs_data).sort_index()
         df.index.name = 'CaseStudy'
 
-        if GROUP_SYNTHETIC_CATEGORIES:
+        if self.table_kwargs["group_synth"]:
             df.index = df.index.map(compute_cs_category_grouping)
 
             df = df.groupby(df.index.name, as_index=True).agg({