Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements experiment to detect the hot functions of a project #884

Merged
merged 9 commits into from
Aug 21, 2024
104 changes: 104 additions & 0 deletions varats-core/varats/report/hot_functions_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import typing as tp
from dataclasses import dataclass
from pathlib import Path

from pandas import read_csv

from varats.experiment.workload_util import WorkloadSpecificReportAggregate
from varats.report.report import BaseReport, ReportAggregate


@dataclass
class XRayFunctionWrapper:
name: str
count: int
sum_time: float


class HotFunctionReport(BaseReport, shorthand="HFR", file_type=".csv"):
"""Report class to load and evaluate the hot function data."""

MAX_TRACK_FUNCTIONS = 50

def __init__(self, path: Path) -> None:
super().__init__(path)
self.__function_data = read_csv(path)

def top_n_functions(self, limit: int = 10) -> tp.List[XRayFunctionWrapper]:
"""Determines the `n` hottest functions in which the most time was
spent."""
self.__function_data.sort_values(
by='sum', ascending=False, inplace=True
)
return [
XRayFunctionWrapper(
name=row["function"], count=row['count'], sum_time=row["sum"]
) for _, row in self.__function_data.head(limit).iterrows()
]

def hot_functions(self, threshold: int = 2) -> tp.List[XRayFunctionWrapper]:
"""
Args:
threshold: min percentage a function needs as total
time to count as hot
"""
if threshold < 0 or threshold > 100:
raise ValueError(
"Threshold value needs to be in the range [0,...,100] "
f"but was {threshold}"
)

self.__function_data.sort_values(
by='sum', ascending=False, inplace=True
)
# The total time tracked only includes time spend in the top n
# (MAX_TRACK_FUNCTIONS) functions
total_time_tracked = self.__function_data["sum"].max()

if threshold == 0:
sum_time_cutoff = 0
else:
sum_time_cutoff = (total_time_tracked * threshold) / 100

return [
XRayFunctionWrapper(
name=row["function"], count=row['count'], sum_time=row["sum"]
)
for _, row in self.__function_data.iterrows()
if row["sum"] > sum_time_cutoff
]

def print_full_dump(self) -> None:
print(f"{self.__function_data}")


class WLHotFunctionAggregate(
WorkloadSpecificReportAggregate[HotFunctionReport],
shorthand="WL" + HotFunctionReport.SHORTHAND + ReportAggregate.SHORTHAND,
file_type=ReportAggregate.FILE_TYPE
):

def __init__(self, path: Path) -> None:
super().__init__(path, HotFunctionReport)

def dump_all_reports(self) -> None:
"""Dumps the contents of all loaded hot functions reports."""
for wl_name in self.workload_names():
for report in self.reports(wl_name):
report.print_full_dump()

def hot_functions_per_workload(
self, threshold: int = 2
) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]:
"""
Args:
threshold: min percentage a function needs as
total time to count as hot
"""
res: tp.Dict[str, tp.List[XRayFunctionWrapper]] = {}
for wl_name in self.workload_names():
# TODO: repetition handling
for report in self.reports(wl_name):
res[wl_name] = report.hot_functions(threshold=threshold)

return res
181 changes: 181 additions & 0 deletions varats/varats/experiments/vara/hot_function_experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""Experiment that detects the hot functions of a project."""
import typing as tp
from pathlib import Path

from benchbuild.command import ProjectCommand, cleanup
from benchbuild.extensions import compiler, run, time
from benchbuild.utils import actions
from plumbum import local

from varats.experiment.experiment_util import (
ZippedReportFolder,
create_new_success_result_filepath,
get_default_compile_error_wrapped,
ExperimentHandle,
)
from varats.experiment.workload_util import WorkloadCategory, workload_commands
from varats.experiments.vara.feature_experiment import FeatureExperiment
from varats.experiments.vara.feature_perf_precision import (
select_project_binaries,
)
from varats.project.project_util import BinaryType, ProjectBinaryWrapper
from varats.project.varats_project import VProject
from varats.report.hot_functions_report import (
HotFunctionReport,
WLHotFunctionAggregate,
)
from varats.report.report import ReportSpecification
from varats.utils.config import get_current_config_id


def perf_prec_workload_commands(
project: VProject, binary: ProjectBinaryWrapper
) -> tp.List[ProjectCommand]:
"""Uniformly select the workloads that should be processed."""

wl_commands = []

if not project.name.startswith(
"SynthIP"
) and project.name != "SynthSAFieldSensitivity":
# Example commands from these CS are to "fast"
wl_commands += workload_commands(
project, binary, [WorkloadCategory.EXAMPLE]
)

wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL])

wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM])

return wl_commands


class RunXRayProfiler(actions.ProjectStep): # type: ignore
"""Profiling step that runs a XRay instrumented binary to extract function-
level measurement data."""

NAME = "RunInstrumentedXRayBinaries"
DESCRIPTION = "Profile a project that was instrumented \
with xray instrumentations."

project: VProject

def __init__(
self, project: VProject, experiment_handle: ExperimentHandle
) -> None:
super().__init__(project=project)
self.__experiment_handle = experiment_handle

def __call__(self) -> actions.StepResult:
return self.run_instrumented_code()

def __str__(self, indent: int = 0) -> str:
return str(
actions.textwrap.indent(
f"* {self.project.name}: Run VaRA "
"measurements together with XRay", indent * " "
)
)

def run_instrumented_code(self) -> actions.StepResult:
"""Run the instrumented code to detect hot functions."""
# pylint: disable=import-outside-toplevel
from plumbum.cmd import llvm_xray

for binary in self.project.binaries:
if binary.type != BinaryType.EXECUTABLE:
# Skip libraries as we cannot run them
continue

with local.cwd(local.path(self.project.builddir)):

result_filepath = create_new_success_result_filepath(
exp_handle=self.__experiment_handle,
report_type=self.__experiment_handle.report_spec().
main_report,
project=self.project,
binary=binary,
config_id=get_current_config_id(self.project)
)
with ZippedReportFolder(
result_filepath.full_path()
) as reps_tmp_dir:
for rep in range(0, 1):
for prj_command in perf_prec_workload_commands(
project=self.project, binary=binary
):
hot_function_report_file = Path(reps_tmp_dir) / (
"hot-func-trace_"
f"{prj_command.command.label}_{rep}"
".csv"
)

unique_tracefile_tag = \
f"xray_{prj_command.command.label}_{rep}."
with local.env(
XRAY_OPTIONS=" ".join([
"patch_premain=true",
"xray_mode=xray-basic",
f"xray_logfile_base={unique_tracefile_tag}"
])
):
with cleanup(prj_command):
pb_cmd = prj_command.command.as_plumbum(
project=self.project
)
pb_cmd(retcode=binary.valid_exit_codes)

for f in Path(".").iterdir():
if f.name.startswith(unique_tracefile_tag):
xray_log_path = f.absolute()
break

instr_map_path = local.path(
self.project.primary_source
) / binary.path

llvm_xray(
"account", f"{xray_log_path}",
"--deduce-sibling-calls",
f"--instr_map={instr_map_path}",
f"--output={hot_function_report_file}",
"--format=csv",
f"--top={HotFunctionReport.MAX_TRACK_FUNCTIONS}"
)

return actions.StepResult.OK


class XRayFindHotFunctions(FeatureExperiment, shorthand="HF"):
"""Experiment for finding hot functions in code."""

NAME = "DetermineHotFunctions"
REPORT_SPEC = ReportSpecification(WLHotFunctionAggregate)

def actions_for_project(
self, project: VProject
) -> tp.MutableSequence[actions.Step]:
project.cflags += [
"-fxray-instrument",
"-fxray-instruction-threshold=1",
]

project.runtime_extension = run.RuntimeExtension(project, self) \
<< time.RunWithTime()

project.compiler_extension = compiler.RunCompiler(project, self)

project.compile = get_default_compile_error_wrapped(
self.get_handle(), project,
self.get_handle().report_spec().main_report
)

binary = select_project_binaries(project)[0]
if binary.type != BinaryType.EXECUTABLE:
raise AssertionError("Experiment only works with executables.")

return [
actions.Compile(project),
RunXRayProfiler(project, self.get_handle()),
actions.Clean(project),
]
96 changes: 96 additions & 0 deletions varats/varats/tables/hot_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Module for the HotFunctionsTable."""
import typing as tp

import pandas as pd

from varats.experiments.vara.hot_function_experiment import XRayFindHotFunctions
from varats.paper.paper_config import get_loaded_paper_config
from varats.paper_mgmt.case_study import get_case_study_file_name_filter
from varats.report.hot_functions_report import WLHotFunctionAggregate
from varats.revision.revisions import get_processed_revisions_files
from varats.table.table import Table
from varats.table.table_utils import dataframe_to_table
from varats.table.tables import TableFormat, TableGenerator


class HotFunctionsTable(Table, table_name="hot_functions"):
"""A concice table that provides a quick overview of all the detected hot
functions."""

def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
case_studies = get_loaded_paper_config().get_all_case_studies()

df = pd.DataFrame()

for case_study in case_studies:
project_name = case_study.project_name

experiment_type = XRayFindHotFunctions
report_files = get_processed_revisions_files(
project_name, experiment_type, WLHotFunctionAggregate,
get_case_study_file_name_filter(case_study)
)

for report_filepath in report_files:
agg_hot_functions_report = WLHotFunctionAggregate(
report_filepath.full_path()
)
report_file = agg_hot_functions_report.filename

hot_funcs = agg_hot_functions_report.hot_functions_per_workload(
threshold=2
)

entries = []
for workload_name in agg_hot_functions_report.workload_names():
hot_func_data = hot_funcs[workload_name]
for hf in hot_func_data:
new_row = {
"Project":
project_name,
"Binary":
report_file.binary_name,
"Revision":
str(report_file.commit_hash),
"Workload":
workload_name,
"FunctionName":
hf.name,
"TimeSpent":
hf.sum_time,
"Reps":
len(
agg_hot_functions_report.
reports(workload_name)
)
}

# df = df.append(new_row, ignore_index=True)
entries.append(pd.DataFrame([new_row]))

df = pd.concat(entries, ignore_index=True)

df.sort_values(["Project", "Binary"], inplace=True)
df.set_index(
["Project", "Binary"],
inplace=True,
)

kwargs: tp.Dict[str, tp.Any] = {}

return dataframe_to_table(
df,
table_format,
wrap_table=wrap_table,
wrap_landscape=True,
**kwargs
)


class HotFunctionsTableGenerator(
TableGenerator, generator_name="hot-functions", options=[]
):
"""Generator for `HotFunctionsTable`."""

def generate(self) -> tp.List[Table]:
return [HotFunctionsTable(self.table_config, **self.table_kwargs)]
1 change: 1 addition & 0 deletions varats/varats/tools/bb_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None:
'varats.experiments.vara.marker_tester',
'varats.experiments.vara.phasar_fta',
'varats.experiments.vara.feature_region_verifier_experiment',
'varats.experiments.vara.hot_function_experiment',
]


Expand Down
Loading