From 1b2b0bd358153ad287b82f7b9da482a6eaa3d939 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Sat, 27 Apr 2024 12:09:55 +0200 Subject: [PATCH 1/6] Implements experiment to detect the hot functions of a project --- .../varats/report/hot_functions_report.py | 98 ++++++++++ .../vara/hot_function_experiment.py | 175 ++++++++++++++++++ varats/varats/projects/c_projects/xz.py | 2 +- varats/varats/tables/hot_functions.py | 94 ++++++++++ varats/varats/tools/bb_config.py | 1 + 5 files changed, 369 insertions(+), 1 deletion(-) create mode 100644 varats-core/varats/report/hot_functions_report.py create mode 100644 varats/varats/experiments/vara/hot_function_experiment.py create mode 100644 varats/varats/tables/hot_functions.py diff --git a/varats-core/varats/report/hot_functions_report.py b/varats-core/varats/report/hot_functions_report.py new file mode 100644 index 000000000..4743dc394 --- /dev/null +++ b/varats-core/varats/report/hot_functions_report.py @@ -0,0 +1,98 @@ +import typing as tp +from dataclasses import dataclass +from pathlib import Path + +from pandas import read_csv + +from varats.experiment.workload_util import WorkloadSpecificReportAggregate +from varats.report.report import BaseReport, ReportAggregate + + +@dataclass +class XRayFunctionWrapper: + name: str + count: int + sum_time: float + + +class HotFunctionReport(BaseReport, shorthand="HFR", file_type=".csv"): + + MAX_TRACK_FUNCTIONS = 50 + + def __init__(self, path: Path) -> None: + super().__init__(path) + self.__function_data = read_csv(path) + + def top_n_functions(self, limit=10) -> tp.List[XRayFunctionWrapper]: + self.__function_data.sort_values( + by='sum', ascending=False, inplace=True + ) + return [ + XRayFunctionWrapper( + name=row["function"], count=row['count'], sum_time=row["sum"] + ) for _, row in self.__function_data.head(limit).iterrows() + ] + + def hot_functions(self, threshold=2) -> tp.List[XRayFunctionWrapper]: + """ + Args: + threshold: min percentage a function needs as self time to count as hot + """ + if threshold < 0 or threshold > 100: + raise ValueError( + "Threshold value needs to be in the range [0,...,100] " + f"but was {threshold}" + ) + + self.__function_data.sort_values( + by='sum', ascending=False, inplace=True + ) + # The total time tracked only includes time spend in the top n + # (MAX_TRACK_FUNCTIONS) functions + total_time_tracked = self.__function_data["sum"].sum() + + if threshold == 0: + sum_time_cutoff = 0 + else: + sum_time_cutoff = (total_time_tracked * threshold) / 100 + + return [ + XRayFunctionWrapper( + name=row["function"], count=row['count'], sum_time=row["sum"] + ) + for _, row in self.__function_data.iterrows() + if row["sum"] > sum_time_cutoff + ] + + def print_full_dump(self) -> None: + print(f"{self.__function_data}") + + +class WLHotFunctionAggregate( + WorkloadSpecificReportAggregate[HotFunctionReport], + shorthand="WL" + HotFunctionReport.SHORTHAND + ReportAggregate.SHORTHAND, + file_type=ReportAggregate.FILE_TYPE +): + + def __init__(self, path: Path) -> None: + super().__init__(path, HotFunctionReport) + + def dump_all_reports(self) -> None: + for wl_name in self.workload_names(): + for report in self.reports(wl_name): + report.print_full_dump() + + def hot_functions_per_workload( + self, threshold=2 + ) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]: + """ + Args: + threshold: min percentage a function needs as self time to count as hot + """ + res: tp.Dict[str, tp.List[XRayFunctionWrapper]] = {} + for wl_name in self.workload_names(): + # TODO: repetition handling + for report in self.reports(wl_name): + res[wl_name] = report.hot_functions(threshold=threshold) + + return res diff --git a/varats/varats/experiments/vara/hot_function_experiment.py b/varats/varats/experiments/vara/hot_function_experiment.py new file mode 100644 index 000000000..653917ddc --- /dev/null +++ b/varats/varats/experiments/vara/hot_function_experiment.py @@ -0,0 +1,175 @@ +import typing as tp +from pathlib import Path + +from benchbuild.command import ProjectCommand, cleanup +from benchbuild.extensions import compiler, run, time +from benchbuild.utils import actions +from plumbum import local +from plumbum.cmd import llvm_xray + +from varats.experiment.experiment_util import ( + ZippedReportFolder, + create_new_success_result_filepath, + get_default_compile_error_wrapped, + ExperimentHandle, +) +from varats.experiment.workload_util import WorkloadCategory, workload_commands +from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.experiments.vara.feature_perf_precision import ( + select_project_binaries, +) +from varats.project.project_util import BinaryType, ProjectBinaryWrapper +from varats.project.varats_project import VProject +from varats.report.hot_functions_report import ( + HotFunctionReport, + WLHotFunctionAggregate, +) +from varats.report.report import ReportSpecification +from varats.utils.config import get_current_config_id + + +def perf_prec_workload_commands( + project: VProject, binary: ProjectBinaryWrapper +) -> tp.List[ProjectCommand]: + """Uniformly select the workloads that should be processed.""" + + wl_commands = [] + + if not project.name.startswith( + "SynthIP" + ) and project.name != "SynthSAFieldSensitivity": + # Example commands from these CS are to "fast" + wl_commands += workload_commands( + project, binary, [WorkloadCategory.EXAMPLE] + ) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL]) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + + return wl_commands + + +class RunXRayProfiler(actions.ProjectStep): + """Profiling step that runs a XRay instrumented binary to extract function- + level measurement data.""" + + NAME = "RunInstrumentedXRayBinaries" + DESCRIPTION = "Profile a project that was instrumented \ + with xray instrumentations." + + project: VProject + + def __init__( + self, project: VProject, experiment_handle: ExperimentHandle + ) -> None: + super().__init__(project=project) + self.__experiment_handle = experiment_handle + + def __call__(self) -> actions.StepResult: + return self.run_instrumented_code() + + def __str__(self, indent: int = 0) -> str: + return actions.textwrap.indent( + f"* {self.project.name}: Run VaRA measurements together with XRay", + indent * " " + ) + + def run_instrumented_code(self) -> actions.StepResult: + for binary in self.project.binaries: + if binary.type != BinaryType.EXECUTABLE: + # Skip libraries as we cannot run them + continue + + with local.cwd(local.path(self.project.builddir)): + + result_filepath = create_new_success_result_filepath( + exp_handle=self.__experiment_handle, + report_type=self.__experiment_handle.report_spec(). + main_report, + project=self.project, + binary=binary, + config_id=get_current_config_id(self.project) + ) + with ZippedReportFolder( + result_filepath.full_path() + ) as reps_tmp_dir: + for rep in range(0, 1): + for prj_command in perf_prec_workload_commands( + project=self.project, binary=binary + ): + hot_function_report_file = Path(reps_tmp_dir) / ( + "hot-func-trace_" + f"{prj_command.command.label}_{rep}" + ".csv" + ) + + unique_tracefile_tag = \ + f"xray_{prj_command.command.label}_{rep}." + with local.env( + XRAY_OPTIONS=" ".join([ + "patch_premain=true", + "xray_mode=xray-basic", + f"xray_logfile_base={unique_tracefile_tag}" + ]) + ): + with cleanup(prj_command): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + pb_cmd(retcode=binary.valid_exit_codes) + + for f in Path(".").iterdir(): + if f.name.startswith(unique_tracefile_tag): + xray_log_path = f.absolute() + break + + instr_map_path = local.path( + self.project.primary_source + ) / binary.path + + llvm_xray( + "account", f"{xray_log_path}", + "--deduce-sibling-calls", + f"--instr_map={instr_map_path}", + f"--output={hot_function_report_file}", + "--format=csv", + f"--top={HotFunctionReport.MAX_TRACK_FUNCTIONS}" + ) + + return actions.StepResult.OK + + +class XRayFindHotFunctions(FeatureExperiment, shorthand="HF"): + """Experiment for finding hot functions in code.""" + + NAME = "DetermineHotFunctions" + REPORT_SPEC = ReportSpecification(WLHotFunctionAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + project.cflags += [ + "-fxray-instrument", + "-fxray-instruction-threshold=1", + ] + + project.runtime_extension = run.RuntimeExtension(project, self) \ + << time.RunWithTime() + + project.compiler_extension = compiler.RunCompiler(project, self) + + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, + self.get_handle().report_spec().main_report + ) + + binary = select_project_binaries(project)[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + return [ + actions.Compile(project), + RunXRayProfiler(project, self.get_handle()), + actions.Clean(project), + ] diff --git a/varats/varats/projects/c_projects/xz.py b/varats/varats/projects/c_projects/xz.py index 3d1a580ed..9f0f1e73e 100644 --- a/varats/varats/projects/c_projects/xz.py +++ b/varats/varats/projects/c_projects/xz.py @@ -91,7 +91,7 @@ class Xz(VProject): # Use output_param to ensure input file # gets appended after all arguments. output_param=["{output}"], - output=SourceRoot("geo-maps/countries-land-250m.geo.json"), + output=SourceRoot("geo-maps/countries-land-1km.geo.json"), label="countries-land-1km", creates=["geo-maps/countries-land-1km.geo.json.xz"] ) diff --git a/varats/varats/tables/hot_functions.py b/varats/varats/tables/hot_functions.py new file mode 100644 index 000000000..ae98b2c88 --- /dev/null +++ b/varats/varats/tables/hot_functions.py @@ -0,0 +1,94 @@ +"""Module for the HotFunctionsTable.""" +import typing as tp + +import pandas as pd + +from varats.experiments.vara.hot_function_experiment import XRayFindHotFunctions +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.hot_functions_report import WLHotFunctionAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.table.table import Table +from varats.table.table_utils import dataframe_to_table +from varats.table.tables import TableFormat, TableGenerator + + +class HotFunctionsTable(Table, table_name="hot_functions"): + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + + df = pd.DataFrame() + + for case_study in case_studies: + project_name = case_study.project_name + + experiment_type = XRayFindHotFunctions + report_files = get_processed_revisions_files( + project_name, experiment_type, WLHotFunctionAggregate, + get_case_study_file_name_filter(case_study) + ) + + for report_filepath in report_files: + agg_hot_functions_report = WLHotFunctionAggregate( + report_filepath.full_path() + ) + report_file = agg_hot_functions_report.filename + + hot_funcs = agg_hot_functions_report.hot_functions_per_workload( + threshold=2 + ) + + entries = [] + for workload_name in agg_hot_functions_report.workload_names(): + hot_func_data = hot_funcs[workload_name] + for hf in hot_func_data: + new_row = { + "Project": + project_name, + "Binary": + report_file.binary_name, + "Revision": + str(report_file.commit_hash), + "Workload": + workload_name, + "FunctionName": + hf.name, + "TimeSpent": + hf.sum_time, + "Reps": + len( + agg_hot_functions_report. + reports(workload_name) + ) + } + + # df = df.append(new_row, ignore_index=True) + entries.append(pd.DataFrame([new_row])) + + df = pd.concat(entries, ignore_index=True) + + df.sort_values(["Project", "Binary"], inplace=True) + df.set_index( + ["Project", "Binary"], + inplace=True, + ) + + kwargs: tp.Dict[str, tp.Any] = {} + + return dataframe_to_table( + df, + table_format, + wrap_table=wrap_table, + wrap_landscape=True, + **kwargs + ) + + +class HotFunctionsTableGenerator( + TableGenerator, generator_name="hot-functions", options=[] +): + """Generator for `HotFunctionsTable`.""" + + def generate(self) -> tp.List[Table]: + return [HotFunctionsTable(self.table_config, **self.table_kwargs)] diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 4999b9f48..96da24ac0 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -122,6 +122,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None: 'varats.experiments.vara.marker_tester', 'varats.experiments.vara.phasar_fta', 'varats.experiments.vara.feature_region_verifier_experiment', + 'varats.experiments.vara.hot_function_experiment', ] From 4d354cd3825afbe0a0d62f41f8b30226335d2fc6 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 16 Aug 2024 12:20:26 +0200 Subject: [PATCH 2/6] Make xray a local import --- varats/varats/experiments/vara/hot_function_experiment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/varats/varats/experiments/vara/hot_function_experiment.py b/varats/varats/experiments/vara/hot_function_experiment.py index 653917ddc..0e3a9513c 100644 --- a/varats/varats/experiments/vara/hot_function_experiment.py +++ b/varats/varats/experiments/vara/hot_function_experiment.py @@ -5,7 +5,6 @@ from benchbuild.extensions import compiler, run, time from benchbuild.utils import actions from plumbum import local -from plumbum.cmd import llvm_xray from varats.experiment.experiment_util import ( ZippedReportFolder, @@ -76,6 +75,8 @@ def __str__(self, indent: int = 0) -> str: ) def run_instrumented_code(self) -> actions.StepResult: + from plumbum.cmd import llvm_xray + for binary in self.project.binaries: if binary.type != BinaryType.EXECUTABLE: # Skip libraries as we cannot run them From ee31f675e8b1097f637672ab689f94c76bb1ea42 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Fri, 16 Aug 2024 12:31:25 +0200 Subject: [PATCH 3/6] Adds missing docs --- varats/varats/experiments/vara/hot_function_experiment.py | 3 +++ varats/varats/tables/hot_functions.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/varats/varats/experiments/vara/hot_function_experiment.py b/varats/varats/experiments/vara/hot_function_experiment.py index 0e3a9513c..62a547d5c 100644 --- a/varats/varats/experiments/vara/hot_function_experiment.py +++ b/varats/varats/experiments/vara/hot_function_experiment.py @@ -1,3 +1,4 @@ +"""Experiment that detects the hot functions of a project.""" import typing as tp from pathlib import Path @@ -75,6 +76,8 @@ def __str__(self, indent: int = 0) -> str: ) def run_instrumented_code(self) -> actions.StepResult: + """Run the instrumented code to detect hot functions.""" + # pylint: disable=import-outside-toplevel from plumbum.cmd import llvm_xray for binary in self.project.binaries: diff --git a/varats/varats/tables/hot_functions.py b/varats/varats/tables/hot_functions.py index ae98b2c88..85ec2da17 100644 --- a/varats/varats/tables/hot_functions.py +++ b/varats/varats/tables/hot_functions.py @@ -14,6 +14,8 @@ class HotFunctionsTable(Table, table_name="hot_functions"): + """A concice table that provides a quick overview of all the detected hot + functions.""" def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() From 881be9eba575a02d64e56acd6bb4d984d46b17df Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 19 Aug 2024 21:58:45 +0200 Subject: [PATCH 4/6] Update varats-core/varats/report/hot_functions_report.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sebastian Böhm --- varats-core/varats/report/hot_functions_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/varats-core/varats/report/hot_functions_report.py b/varats-core/varats/report/hot_functions_report.py index 4743dc394..208517a50 100644 --- a/varats-core/varats/report/hot_functions_report.py +++ b/varats-core/varats/report/hot_functions_report.py @@ -49,7 +49,7 @@ def hot_functions(self, threshold=2) -> tp.List[XRayFunctionWrapper]: ) # The total time tracked only includes time spend in the top n # (MAX_TRACK_FUNCTIONS) functions - total_time_tracked = self.__function_data["sum"].sum() + total_time_tracked = self.__function_data["sum"].max() if threshold == 0: sum_time_cutoff = 0 From 300a313588f4c3d13ce3f38d28197ecd3373994e Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Mon, 19 Aug 2024 22:02:59 +0200 Subject: [PATCH 5/6] Rewords docs --- varats-core/varats/report/hot_functions_report.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/varats-core/varats/report/hot_functions_report.py b/varats-core/varats/report/hot_functions_report.py index 208517a50..19ddb7e2b 100644 --- a/varats-core/varats/report/hot_functions_report.py +++ b/varats-core/varats/report/hot_functions_report.py @@ -36,7 +36,8 @@ def top_n_functions(self, limit=10) -> tp.List[XRayFunctionWrapper]: def hot_functions(self, threshold=2) -> tp.List[XRayFunctionWrapper]: """ Args: - threshold: min percentage a function needs as self time to count as hot + threshold: min percentage a function needs as total + time to count as hot """ if threshold < 0 or threshold > 100: raise ValueError( @@ -87,7 +88,8 @@ def hot_functions_per_workload( ) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]: """ Args: - threshold: min percentage a function needs as self time to count as hot + threshold: min percentage a function needs as + total time to count as hot """ res: tp.Dict[str, tp.List[XRayFunctionWrapper]] = {} for wl_name in self.workload_names(): From 307b16aae5689cf1f1e450cc25ada8f040e45a12 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Tue, 20 Aug 2024 20:09:08 +0200 Subject: [PATCH 6/6] Fixes type errors --- varats-core/varats/report/hot_functions_report.py | 10 +++++++--- .../varats/experiments/vara/hot_function_experiment.py | 10 ++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/varats-core/varats/report/hot_functions_report.py b/varats-core/varats/report/hot_functions_report.py index 19ddb7e2b..8b6aae2d1 100644 --- a/varats-core/varats/report/hot_functions_report.py +++ b/varats-core/varats/report/hot_functions_report.py @@ -16,6 +16,7 @@ class XRayFunctionWrapper: class HotFunctionReport(BaseReport, shorthand="HFR", file_type=".csv"): + """Report class to load and evaluate the hot function data.""" MAX_TRACK_FUNCTIONS = 50 @@ -23,7 +24,9 @@ def __init__(self, path: Path) -> None: super().__init__(path) self.__function_data = read_csv(path) - def top_n_functions(self, limit=10) -> tp.List[XRayFunctionWrapper]: + def top_n_functions(self, limit: int = 10) -> tp.List[XRayFunctionWrapper]: + """Determines the `n` hottest functions in which the most time was + spent.""" self.__function_data.sort_values( by='sum', ascending=False, inplace=True ) @@ -33,7 +36,7 @@ def top_n_functions(self, limit=10) -> tp.List[XRayFunctionWrapper]: ) for _, row in self.__function_data.head(limit).iterrows() ] - def hot_functions(self, threshold=2) -> tp.List[XRayFunctionWrapper]: + def hot_functions(self, threshold: int = 2) -> tp.List[XRayFunctionWrapper]: """ Args: threshold: min percentage a function needs as total @@ -79,12 +82,13 @@ def __init__(self, path: Path) -> None: super().__init__(path, HotFunctionReport) def dump_all_reports(self) -> None: + """Dumps the contents of all loaded hot functions reports.""" for wl_name in self.workload_names(): for report in self.reports(wl_name): report.print_full_dump() def hot_functions_per_workload( - self, threshold=2 + self, threshold: int = 2 ) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]: """ Args: diff --git a/varats/varats/experiments/vara/hot_function_experiment.py b/varats/varats/experiments/vara/hot_function_experiment.py index 62a547d5c..e17cee9d0 100644 --- a/varats/varats/experiments/vara/hot_function_experiment.py +++ b/varats/varats/experiments/vara/hot_function_experiment.py @@ -50,7 +50,7 @@ def perf_prec_workload_commands( return wl_commands -class RunXRayProfiler(actions.ProjectStep): +class RunXRayProfiler(actions.ProjectStep): # type: ignore """Profiling step that runs a XRay instrumented binary to extract function- level measurement data.""" @@ -70,9 +70,11 @@ def __call__(self) -> actions.StepResult: return self.run_instrumented_code() def __str__(self, indent: int = 0) -> str: - return actions.textwrap.indent( - f"* {self.project.name}: Run VaRA measurements together with XRay", - indent * " " + return str( + actions.textwrap.indent( + f"* {self.project.name}: Run VaRA " + "measurements together with XRay", indent * " " + ) ) def run_instrumented_code(self) -> actions.StepResult: