From 756ee48651f042d2e110aa68263105e4c5ac1207 Mon Sep 17 00:00:00 2001
From: Leonie von Mann <s8levonm@stud.uni-saarland.de>
Date: Fri, 2 Feb 2024 16:01:41 +0100
Subject: [PATCH] Add experiment CompareASTBlame

---
 .../varats/data/reports/blame_annotations.py  | 152 +++++++++++
 .../experiments/vara/blame_ast_experiment.py  | 239 ++++++++++++++++++
 varats/varats/tools/bb_config.py              |   1 +
 3 files changed, 392 insertions(+)
 create mode 100644 varats/varats/data/reports/blame_annotations.py
 create mode 100644 varats/varats/experiments/vara/blame_ast_experiment.py

diff --git a/varats/varats/data/reports/blame_annotations.py b/varats/varats/data/reports/blame_annotations.py
new file mode 100644
index 000000000..c50d4e6ed
--- /dev/null
+++ b/varats/varats/data/reports/blame_annotations.py
@@ -0,0 +1,152 @@
+"""Module for BlameAnnotations, mapping instructions to blame information."""
+import typing as tp
+from pathlib import Path
+
+import yaml
+
+from varats.base.version_header import VersionHeader
+from varats.report.report import BaseReport
+
+
+class BlameInstruction():
+    """Collection of debug blame and VaRA blame."""
+
+    def __init__(self, dbghash: str, varahash: str) -> None:
+        self.__dbghash = dbghash
+        self.__varahash = varahash
+
+    @staticmethod
+    def create_blame_instruction(
+        raw_entry: tp.Dict[str, tp.Any]
+    ) -> 'BlameInstruction':
+        """Creates a :class`BlameInstrucion`from the corresponding yaml document
+        section."""
+        dbghash = str(raw_entry['dbghash'])
+        varahash = str(raw_entry['varahash'])
+        return BlameInstruction(dbghash, varahash)  #TODO name
+
+    @property
+    def name(self) -> str:
+        """Name of the instruction."""
+        return self.__name
+
+    @property
+    def dbghash(self) -> str:
+        """Blame based on debug information."""
+        return self.__dbghash
+
+    @property
+    def varahash(self) -> str:
+        """Blame based on IRegion."""
+        return self.__varahash
+
+
+class BlameAnnotations(BaseReport, shorthand="BA", file_type="yaml"):
+    """Report containing debug blame and blame annotations."""
+
+    def __init__(self, path: Path) -> None:
+        super().__init__(path)
+        self.__blame_annotations = {}
+
+        with open(path, 'r') as stream:
+            documents = yaml.load_all(stream, Loader=yaml.CLoader)
+            version_header = VersionHeader(next(documents))
+            version_header.raise_if_not_type("BlameAnnotations")
+
+            raw_blame_report = next(documents)
+
+            for raw_entry in raw_blame_report['annotations']:
+                new_entry = (
+                    BlameInstruction.create_blame_instruction(
+                        raw_blame_report['annotations'][raw_entry]
+                    )
+                )
+                self.__blame_annotations[raw_entry] = new_entry
+
+    @property
+    def blame_annotations(self) -> tp.ValuesView[BlameInstruction]:
+        """Iterate over all blame annotations."""
+        return self.__blame_annotations.values()
+
+
+class ASTBlameReport(BaseReport, shorthand="BAST", file_type="yaml"):
+    """Report containing difference between AST-based and line-based blame."""
+
+    def __init__(self, path: Path) -> None:
+        super().__init__(path)
+        self.__diff_dbg_ast = 0
+        self.__eq_dbg_ast = 0
+        self.__diff_line_ast = 0
+        self.__eq_line_ast = 0
+
+    def print_yaml(self) -> None:
+        data = {
+            'dbg vs ast': {
+                'diff': self.__diff_dbg_ast,
+                'equal': self.__eq_dbg_ast
+            },
+            'line vs ast': {
+                'diff': self.__diff_line_ast,
+                'equal': self.__eq_line_ast
+            }
+        }
+        with open(self.path, 'w') as yaml_file:
+            yaml.dump(data, yaml_file, default_flow_style=False)
+
+    @property
+    def diff_dbg_ast(self) -> int:
+        """Count of different instructions between debug and ast blame."""
+        return self.__diff_dbg_ast
+
+    @diff_dbg_ast.setter
+    def diff_dbg_ast(self, value) -> None:
+        self.__diff_dbg_ast = value
+
+    @property
+    def eq_dbg_ast(self) -> int:
+        """Count of equal instructions between debug and ast blame."""
+        return self.__eq_dbg_ast
+
+    @eq_dbg_ast.setter
+    def eq_dbg_ast(self, value) -> None:
+        self.__eq_dbg_ast = value
+
+    @property
+    def diff_line_ast(self) -> int:
+        """Count of different instructions between line and ast blame."""
+        return self.__diff_line_ast
+
+    @diff_line_ast.setter
+    def diff_line_ast(self, value) -> None:
+        self.__diff_line_ast = value
+
+    @property
+    def eq_line_ast(self) -> int:
+        """Count of equal instructions between line and ast blame."""
+        return self.__eq_line_ast
+
+    @eq_line_ast.setter
+    def eq_line_ast(self, value) -> None:
+        self.__eq_line_ast = value
+
+
+def compare_blame_annotations(
+    line_ba: BlameAnnotations, ast_ba: BlameAnnotations, path: Path
+) -> ASTBlameReport:
+    ast_report = ASTBlameReport(path)
+
+    for entry in ast_ba.blame_annotations:
+        if entry.dbghash == entry.varahash:
+            ast_report.eq_dbg_ast += 1
+        else:
+            ast_report.diff_dbg_ast += 1
+
+    for line_entry, ast_entry in zip(
+        line_ba.blame_annotations, ast_ba.blame_annotations
+    ):
+        if line_entry.varahash == ast_entry.varahash:
+            ast_report.eq_line_ast += 1
+        else:
+            ast_report.diff_line_ast += 1
+
+    return ast_report
diff --git a/varats/varats/experiments/vara/blame_ast_experiment.py b/varats/varats/experiments/vara/blame_ast_experiment.py
new file mode 100644
index 000000000..d8aa6837d
--- /dev/null
+++ b/varats/varats/experiments/vara/blame_ast_experiment.py
@@ -0,0 +1,239 @@
+"""
+Implements the blame AST experiment.
+
+The experiment compares AST based blame annotations to line based ones.
+"""
+
+import fnmatch
+import os
+import typing as tp
+
+from benchbuild import Project
+from benchbuild.utils import actions
+from benchbuild.utils.cmd import opt
+
+import varats.experiments.vara.blame_experiment as BE
+from varats.data.reports.blame_annotations import ASTBlameReport as BAST
+from varats.data.reports.blame_annotations import BlameAnnotations as BA
+from varats.data.reports.blame_annotations import compare_blame_annotations
+from varats.data.reports.blame_report import BlameReport as BR
+from varats.experiment.experiment_util import (
+    ExperimentHandle,
+    VersionExperiment,
+    create_default_analysis_failure_handler,
+    create_default_compiler_error_handler,
+    create_new_success_result_filepath,
+    exec_func_with_pe_error_handler,
+    get_varats_result_folder,
+    wrap_unlimit_stack_size,
+)
+from varats.experiment.wllvm import (
+    BCFileExtensions,
+    _create_default_bc_file_creation_actions,
+    get_cached_bc_file_path,
+)
+from varats.project.project_util import get_local_project_git_paths
+from varats.project.varats_project import VProject
+from varats.report.report import (
+    FileStatusExtension,
+    ReportFilename,
+    ReportFilepath,
+    ReportSpecification,
+)
+from varats.utils.git_util import ShortCommitHash
+
+
+class BlameAnnotationGeneration(actions.ProjectStep):  #type: ignore
+    """Generate blame annotation report."""
+
+    NAME = "BlameAnnotationGeneration"
+    DESCRIPTION = "Generates report with debug and IInfo blame with -vara-BA of VaRA."
+
+    project: VProject
+
+    def __init__(
+        self, project: Project, experiment_handle: ExperimentHandle,
+        file_prefix: str
+    ):
+        super().__init__(project=project)
+        self.__experiment_handle = experiment_handle
+        self.__file_prefix = file_prefix
+
+    def __call__(self) -> actions.StepResult:
+        return self.analyze()
+
+    def analyze(self) -> actions.StepResult:
+        """
+        This step performs the actual analysis with the correct command line
+        flags.
+
+        Flags used:
+            * -vara-BA: to run a commit flow report
+            * -yaml-report-outfile=<path>: specify the path to store the results
+        """
+
+        for binary in self.project.binaries:
+            # Add to the user-defined path for saving the results of the
+            # analysis also the name and the unique id of the project of every
+            # run.
+            varats_result_folder = get_varats_result_folder(self.project)
+            result_filepath = ReportFilepath(
+                varats_result_folder,
+                ReportFilename(
+                    self.__file_prefix + self.__experiment_handle.get_file_name(
+                        BAST.shorthand(),
+                        project_name=str(self.project.name),
+                        binary_name=binary.name,
+                        project_revision=ShortCommitHash(
+                            self.project.version_of_primary
+                        ),
+                        project_uuid=str(self.project.run_uuid),
+                        extension_type=FileStatusExtension.SUCCESS,
+                        config_id=None
+                    ).filename
+                )
+            )
+
+            opt_params = [
+                "--enable-new-pm=0", "-vara-BD", "-vara-BA",
+                "-vara-init-commits", "-vara-rewriteMD",
+                "-vara-git-mappings=" + ",".join([
+                    f'{repo}:{path}' for repo, path in
+                    get_local_project_git_paths(self.project.name).items()
+                ]), "-vara-use-phasar",
+                f"-vara-report-outfile={result_filepath}",
+                get_cached_bc_file_path(
+                    self.project, binary, [
+                        BCFileExtensions.NO_OPT, BCFileExtensions.TBAA,
+                        BCFileExtensions.BLAME
+                    ]
+                )
+            ]
+
+            run_cmd = opt[opt_params]
+
+            run_cmd = wrap_unlimit_stack_size(run_cmd)
+
+            exec_func_with_pe_error_handler(
+                run_cmd,
+                create_default_analysis_failure_handler(
+                    self.__experiment_handle, self.project, BAST
+                )
+            )
+
+        return actions.StepResult.OK
+
+
+class BlameASTComparison(actions.ProjectStep):  #type: ignore
+    """Compare BlameAnnotation reports of AST based annotations to line based
+    ones."""
+
+    NAME = "BlameASTComparison"
+    DESCRIPTION = "Compares BlameAnnotation reports of AST based annotations to line based ones."
+
+    project: VProject
+
+    def __init__(
+        self,
+        project: Project,
+        experiment_handle: ExperimentHandle,
+    ):
+        super().__init__(project=project)
+        self.__experiment_handle = experiment_handle
+
+    def __call__(self) -> actions.StepResult:
+        return self.analyze()
+
+    def analyze(self) -> actions.StepResult:
+        for binary in self.project.binaries:
+            varats_result_folder = get_varats_result_folder(self.project)
+
+            for file in os.listdir(varats_result_folder):
+                if fnmatch.fnmatch(file, "linereport" + '*'):
+                    line_filepath = os.path.join(varats_result_folder, file)
+                if fnmatch.fnmatch(file, "astreport" + '*'):
+                    ast_filepath = os.path.join(varats_result_folder, file)
+
+            line_annotations = BA(line_filepath)
+            ast_annotations = BA(ast_filepath)
+
+            result_file = create_new_success_result_filepath(
+                self.__experiment_handle, BAST, self.project, binary
+            )
+
+            ast_report = compare_blame_annotations(
+                line_annotations, ast_annotations, result_file.full_path()
+            )
+
+            ast_report.print_yaml()
+
+        return actions.StepResult.OK
+
+
+class BlameASTExperiment(VersionExperiment, shorthand="BASTE"):
+    """Compares AST based blame annotations to line based ones."""
+
+    NAME = "CompareASTBlame"
+
+    REPORT_SPEC = ReportSpecification(BAST)
+
+    def actions_for_project(
+        self, project: VProject
+    ) -> tp.MutableSequence[actions.Step]:
+        """
+        Returns the specified steps to run the project(s) specified in the call
+        in a fixed order.
+
+        Args:
+            project: to analyze
+        """
+        # Try, to build the project without optimizations to get more precise
+        # blame annotations. Note: this does not guarantee that a project is
+        # build without optimizations because the used build tool/script can
+        # still add optimizations flags after the experiment specified cflags.
+        project.cflags += ["-O1", "-Xclang", "-disable-llvm-optzns", "-g"]
+        bc_file_extensions = [
+            BCFileExtensions.NO_OPT,
+            BCFileExtensions.TBAA,
+            BCFileExtensions.BLAME,
+        ]
+
+        BE.setup_basic_blame_experiment(self, project, BAST)
+        # Compile with line based blame annotations
+        analysis_actions = _create_default_bc_file_creation_actions(
+            project,
+            bc_file_extensions if bc_file_extensions else [],
+            extraction_error_handler=create_default_compiler_error_handler(
+                self.get_handle(), project, self.REPORT_SPEC.main_report
+            )
+        )
+        # Generate blame annotation report
+        analysis_actions.append(
+            BlameAnnotationGeneration(
+                project, self.get_handle(), "linereport-"
+            )
+        )
+
+        # Compile with AST based blame annotations
+        project.cflags += ["-fvara-ast-GB"]
+        analysis_actions.extend(
+            _create_default_bc_file_creation_actions(
+                project,
+                bc_file_extensions if bc_file_extensions else [],
+                extraction_error_handler=create_default_compiler_error_handler(
+                    self.get_handle(), project, self.REPORT_SPEC.main_report
+                )
+            )
+        )
+
+        # Generate blame annotation report
+        analysis_actions.append(
+            BlameAnnotationGeneration(project, self.get_handle(), "astreport-")
+        )
+
+        # Generate AST blame report (comparison)
+        analysis_actions.append(BlameASTComparison(project, self.get_handle()))
+
+        analysis_actions.append(actions.Clean(project))
+
+        return analysis_actions
diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py
index 4999b9f48..3bca864d0 100644
--- a/varats/varats/tools/bb_config.py
+++ b/varats/varats/tools/bb_config.py
@@ -109,6 +109,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None:
         'varats.experiments.szz.pydriller_szz_experiment',
         'varats.experiments.szz.szz_unleashed_experiment',
         'varats.experiments.vara.agg_region_interaction_perf_runner',
+        'varats.experiments.vara.blame_ast_experiment',
         'varats.experiments.vara.blame_report_experiment',
         'varats.experiments.vara.blame_verifier_experiment',
         'varats.experiments.vara.commit_report_experiment',