From 756ee48651f042d2e110aa68263105e4c5ac1207 Mon Sep 17 00:00:00 2001 From: Leonie von Mann Date: Fri, 2 Feb 2024 16:01:41 +0100 Subject: [PATCH] Add experiment CompareASTBlame --- .../varats/data/reports/blame_annotations.py | 152 +++++++++++ .../experiments/vara/blame_ast_experiment.py | 239 ++++++++++++++++++ varats/varats/tools/bb_config.py | 1 + 3 files changed, 392 insertions(+) create mode 100644 varats/varats/data/reports/blame_annotations.py create mode 100644 varats/varats/experiments/vara/blame_ast_experiment.py diff --git a/varats/varats/data/reports/blame_annotations.py b/varats/varats/data/reports/blame_annotations.py new file mode 100644 index 000000000..c50d4e6ed --- /dev/null +++ b/varats/varats/data/reports/blame_annotations.py @@ -0,0 +1,152 @@ +"""Module for BlameAnnotations, mapping instructions to blame information.""" +import typing as tp +from pathlib import Path + +import yaml + +from varats.base.version_header import VersionHeader +from varats.report.report import BaseReport + + +class BlameInstruction(): + """Collection of debug blame and VaRA blame.""" + + def __init__(self, dbghash: str, varahash: str) -> None: + self.__dbghash = dbghash + self.__varahash = varahash + + @staticmethod + def create_blame_instruction( + raw_entry: tp.Dict[str, tp.Any] + ) -> 'BlameInstruction': + """Creates a :class`BlameInstrucion`from the corresponding yaml document + section.""" + dbghash = str(raw_entry['dbghash']) + varahash = str(raw_entry['varahash']) + return BlameInstruction(dbghash, varahash) #TODO name + + @property + def name(self) -> str: + """Name of the instruction.""" + return self.__name + + @property + def dbghash(self) -> str: + """Blame based on debug information.""" + return self.__dbghash + + @property + def varahash(self) -> str: + """Blame based on IRegion.""" + return self.__varahash + + +class BlameAnnotations(BaseReport, shorthand="BA", file_type="yaml"): + """Report containing debug blame and blame annotations.""" + + def __init__(self, path: Path) -> None: + super().__init__(path) + self.__blame_annotations = {} + + with open(path, 'r') as stream: + documents = yaml.load_all(stream, Loader=yaml.CLoader) + version_header = VersionHeader(next(documents)) + version_header.raise_if_not_type("BlameAnnotations") + + raw_blame_report = next(documents) + + for raw_entry in raw_blame_report['annotations']: + new_entry = ( + BlameInstruction.create_blame_instruction( + raw_blame_report['annotations'][raw_entry] + ) + ) + self.__blame_annotations[raw_entry] = new_entry + + @property + def blame_annotations(self) -> tp.ValuesView[BlameInstruction]: + """Iterate over all blame annotations.""" + return self.__blame_annotations.values() + + +class ASTBlameReport(BaseReport, shorthand="BAST", file_type="yaml"): + """Report containing difference between AST-based and line-based blame.""" + + def __init__(self, path: Path) -> None: + super().__init__(path) + self.__diff_dbg_ast = 0 + self.__eq_dbg_ast = 0 + self.__diff_line_ast = 0 + self.__eq_line_ast = 0 + + def print_yaml(self) -> None: + data = { + 'dbg vs ast': { + 'diff': self.__diff_dbg_ast, + 'equal': self.__eq_dbg_ast + }, + 'line vs ast': { + 'diff': self.__diff_line_ast, + 'equal': self.__eq_line_ast + } + } + with open(self.path, 'w') as yaml_file: + yaml.dump(data, yaml_file, default_flow_style=False) + + @property + def diff_dbg_ast(self) -> int: + """Count of different instructions between debug and ast blame.""" + return self.__diff_dbg_ast + + @diff_dbg_ast.setter + def diff_dbg_ast(self, value) -> None: + self.__diff_dbg_ast = value + + @property + def eq_dbg_ast(self) -> int: + """Count of equal instructions between debug and ast blame.""" + return self.__eq_dbg_ast + + @eq_dbg_ast.setter + def eq_dbg_ast(self, value) -> None: + self.__eq_dbg_ast = value + + @property + def diff_line_ast(self) -> int: + """Count of different instructions between line and ast blame.""" + return self.__diff_line_ast + + @diff_line_ast.setter + def diff_line_ast(self, value) -> None: + self.__diff_line_ast = value + + @property + def eq_line_ast(self) -> int: + """Count of equal instructions between line and ast blame.""" + return self.__eq_line_ast + + @eq_line_ast.setter + def eq_line_ast(self, value) -> None: + self.__eq_line_ast = value + + +def compare_blame_annotations( + line_ba: BlameAnnotations, ast_ba: BlameAnnotations, path: Path +) -> ASTBlameReport: + ast_report = ASTBlameReport(path) + + for entry in ast_ba.blame_annotations: + if entry.dbghash == entry.varahash: + ast_report.eq_dbg_ast += 1 + else: + ast_report.diff_dbg_ast += 1 + + for line_entry, ast_entry in zip( + line_ba.blame_annotations, ast_ba.blame_annotations + ): + if line_entry.varahash == ast_entry.varahash: + ast_report.eq_line_ast += 1 + else: + ast_report.diff_line_ast += 1 + + return ast_report diff --git a/varats/varats/experiments/vara/blame_ast_experiment.py b/varats/varats/experiments/vara/blame_ast_experiment.py new file mode 100644 index 000000000..d8aa6837d --- /dev/null +++ b/varats/varats/experiments/vara/blame_ast_experiment.py @@ -0,0 +1,239 @@ +""" +Implements the blame AST experiment. + +The experiment compares AST based blame annotations to line based ones. +""" + +import fnmatch +import os +import typing as tp + +from benchbuild import Project +from benchbuild.utils import actions +from benchbuild.utils.cmd import opt + +import varats.experiments.vara.blame_experiment as BE +from varats.data.reports.blame_annotations import ASTBlameReport as BAST +from varats.data.reports.blame_annotations import BlameAnnotations as BA +from varats.data.reports.blame_annotations import compare_blame_annotations +from varats.data.reports.blame_report import BlameReport as BR +from varats.experiment.experiment_util import ( + ExperimentHandle, + VersionExperiment, + create_default_analysis_failure_handler, + create_default_compiler_error_handler, + create_new_success_result_filepath, + exec_func_with_pe_error_handler, + get_varats_result_folder, + wrap_unlimit_stack_size, +) +from varats.experiment.wllvm import ( + BCFileExtensions, + _create_default_bc_file_creation_actions, + get_cached_bc_file_path, +) +from varats.project.project_util import get_local_project_git_paths +from varats.project.varats_project import VProject +from varats.report.report import ( + FileStatusExtension, + ReportFilename, + ReportFilepath, + ReportSpecification, +) +from varats.utils.git_util import ShortCommitHash + + +class BlameAnnotationGeneration(actions.ProjectStep): #type: ignore + """Generate blame annotation report.""" + + NAME = "BlameAnnotationGeneration" + DESCRIPTION = "Generates report with debug and IInfo blame with -vara-BA of VaRA." + + project: VProject + + def __init__( + self, project: Project, experiment_handle: ExperimentHandle, + file_prefix: str + ): + super().__init__(project=project) + self.__experiment_handle = experiment_handle + self.__file_prefix = file_prefix + + def __call__(self) -> actions.StepResult: + return self.analyze() + + def analyze(self) -> actions.StepResult: + """ + This step performs the actual analysis with the correct command line + flags. + + Flags used: + * -vara-BA: to run a commit flow report + * -yaml-report-outfile=: specify the path to store the results + """ + + for binary in self.project.binaries: + # Add to the user-defined path for saving the results of the + # analysis also the name and the unique id of the project of every + # run. + varats_result_folder = get_varats_result_folder(self.project) + result_filepath = ReportFilepath( + varats_result_folder, + ReportFilename( + self.__file_prefix + self.__experiment_handle.get_file_name( + BAST.shorthand(), + project_name=str(self.project.name), + binary_name=binary.name, + project_revision=ShortCommitHash( + self.project.version_of_primary + ), + project_uuid=str(self.project.run_uuid), + extension_type=FileStatusExtension.SUCCESS, + config_id=None + ).filename + ) + ) + + opt_params = [ + "--enable-new-pm=0", "-vara-BD", "-vara-BA", + "-vara-init-commits", "-vara-rewriteMD", + "-vara-git-mappings=" + ",".join([ + f'{repo}:{path}' for repo, path in + get_local_project_git_paths(self.project.name).items() + ]), "-vara-use-phasar", + f"-vara-report-outfile={result_filepath}", + get_cached_bc_file_path( + self.project, binary, [ + BCFileExtensions.NO_OPT, BCFileExtensions.TBAA, + BCFileExtensions.BLAME + ] + ) + ] + + run_cmd = opt[opt_params] + + run_cmd = wrap_unlimit_stack_size(run_cmd) + + exec_func_with_pe_error_handler( + run_cmd, + create_default_analysis_failure_handler( + self.__experiment_handle, self.project, BAST + ) + ) + + return actions.StepResult.OK + + +class BlameASTComparison(actions.ProjectStep): #type: ignore + """Compare BlameAnnotation reports of AST based annotations to line based + ones.""" + + NAME = "BlameASTComparison" + DESCRIPTION = "Compares BlameAnnotation reports of AST based annotations to line based ones." + + project: VProject + + def __init__( + self, + project: Project, + experiment_handle: ExperimentHandle, + ): + super().__init__(project=project) + self.__experiment_handle = experiment_handle + + def __call__(self) -> actions.StepResult: + return self.analyze() + + def analyze(self) -> actions.StepResult: + for binary in self.project.binaries: + varats_result_folder = get_varats_result_folder(self.project) + + for file in os.listdir(varats_result_folder): + if fnmatch.fnmatch(file, "linereport" + '*'): + line_filepath = os.path.join(varats_result_folder, file) + if fnmatch.fnmatch(file, "astreport" + '*'): + ast_filepath = os.path.join(varats_result_folder, file) + + line_annotations = BA(line_filepath) + ast_annotations = BA(ast_filepath) + + result_file = create_new_success_result_filepath( + self.__experiment_handle, BAST, self.project, binary + ) + + ast_report = compare_blame_annotations( + line_annotations, ast_annotations, result_file.full_path() + ) + + ast_report.print_yaml() + + return actions.StepResult.OK + + +class BlameASTExperiment(VersionExperiment, shorthand="BASTE"): + """Compares AST based blame annotations to line based ones.""" + + NAME = "CompareASTBlame" + + REPORT_SPEC = ReportSpecification(BAST) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. + + Args: + project: to analyze + """ + # Try, to build the project without optimizations to get more precise + # blame annotations. Note: this does not guarantee that a project is + # build without optimizations because the used build tool/script can + # still add optimizations flags after the experiment specified cflags. + project.cflags += ["-O1", "-Xclang", "-disable-llvm-optzns", "-g"] + bc_file_extensions = [ + BCFileExtensions.NO_OPT, + BCFileExtensions.TBAA, + BCFileExtensions.BLAME, + ] + + BE.setup_basic_blame_experiment(self, project, BAST) + # Compile with line based blame annotations + analysis_actions = _create_default_bc_file_creation_actions( + project, + bc_file_extensions if bc_file_extensions else [], + extraction_error_handler=create_default_compiler_error_handler( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + ) + # Generate blame annotation report + analysis_actions.append( + BlameAnnotationGeneration( + project, self.get_handle(), "linereport-" + ) + ) + + # Compile with AST based blame annotations + project.cflags += ["-fvara-ast-GB"] + analysis_actions.extend( + _create_default_bc_file_creation_actions( + project, + bc_file_extensions if bc_file_extensions else [], + extraction_error_handler=create_default_compiler_error_handler( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + ) + ) + + # Generate blame annotation report + analysis_actions.append( + BlameAnnotationGeneration(project, self.get_handle(), "astreport-") + ) + + # Generate AST blame report (comparison) + analysis_actions.append(BlameASTComparison(project, self.get_handle())) + + analysis_actions.append(actions.Clean(project)) + + return analysis_actions diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 4999b9f48..3bca864d0 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -109,6 +109,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None: 'varats.experiments.szz.pydriller_szz_experiment', 'varats.experiments.szz.szz_unleashed_experiment', 'varats.experiments.vara.agg_region_interaction_perf_runner', + 'varats.experiments.vara.blame_ast_experiment', 'varats.experiments.vara.blame_report_experiment', 'varats.experiments.vara.blame_verifier_experiment', 'varats.experiments.vara.commit_report_experiment',