diff --git a/tests/utils/test_bb_config.py b/tests/utils/test_bb_config.py index 692b5de39..e563b3e41 100644 --- a/tests/utils/test_bb_config.py +++ b/tests/utils/test_bb_config.py @@ -76,7 +76,8 @@ def test_if_experiments_were_added(self): "varats.experiments.discover_experiments", "varats.experiments.vara.region_instrumentation", "varats.experiments.vara.commit_annotation_report", - "varats.experiments.vara.blame_experiment" + "varats.experiments.vara.blame_experiment", + "varats.experiments.phasar.incremental_analysis" ] loaded_plugins = bb_cfg()["plugins"]["experiments"].value diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index 3ec66e1c4..27ab7d2a1 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -460,6 +460,9 @@ def __init__(self, result_report_path: Path) -> None: super().__init__() self.__result_report_name: Path = result_report_path.with_suffix('') + def __enter__(self) -> Path: + return Path(super().__enter__()) + def __exit__( self, exc_type: tp.Optional[tp.Type[BaseException]], exc_value: tp.Optional[BaseException], diff --git a/varats-core/varats/experiment/wllvm.py b/varats-core/varats/experiment/wllvm.py index 8e02517ef..1a958cb4d 100644 --- a/varats-core/varats/experiment/wllvm.py +++ b/varats-core/varats/experiment/wllvm.py @@ -26,6 +26,7 @@ PEErrorHandler, ) from varats.project.project_util import ProjectBinaryWrapper +from varats.utils.git_util import ShortCommitHash from varats.utils.settings import bb_cfg @@ -163,7 +164,9 @@ def extract(self) -> actions.StepResult: bc_cache_file = bc_cache_folder + self.get_bc_file_name( project_name=str(project.name), binary_name=str(binary.name), - project_version=project.version_of_primary, + project_version=str( + project.active_variant[project.primary_source].version + ), bc_file_extensions=self.bc_file_extensions ) @@ -174,7 +177,8 @@ def extract(self) -> actions.StepResult: def project_bc_files_in_cache( project: Project, - required_bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]] + project_revision: ShortCommitHash, + required_bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]], ) -> bool: """ Checks if all bc files, corresponding to the projects binaries, are in the @@ -182,6 +186,7 @@ def project_bc_files_in_cache( Args: project: the project + project_revision: specific revision that should be in cache required_bc_file_extensions: list of required file extensions Returns: True, if all BC files are present, False otherwise. @@ -197,7 +202,7 @@ def project_bc_files_in_cache( ) + Extract.get_bc_file_name( project_name=str(project.name), binary_name=binary.name, - project_version=project.version_of_primary, + project_version=str(project_revision), bc_file_extensions=required_bc_file_extensions ) ) @@ -240,7 +245,7 @@ def get_bc_cache_actions( extraction_error_handler: tp.Optional[PEErrorHandler] = None, bc_action_creator: tp.Callable[ [Project, tp.List[BCFileExtensions], tp.Optional[PEErrorHandler]], - tp.List[actions.Step]] = _create_default_bc_file_creation_actions + tp.List[actions.Step]] = _create_default_bc_file_creation_actions, ) -> tp.List[actions.Step]: """ Builds the action pipeline, if needed, to fill the BC file cache that @@ -255,8 +260,43 @@ def get_bc_cache_actions( Returns: required actions to populate the BC cache """ + return get_bc_cache_actions_for_revision( + project, None, bc_file_extensions, extraction_error_handler, + bc_action_creator + ) + + +def get_bc_cache_actions_for_revision( + project: Project, + project_revision: tp.Optional[ShortCommitHash] = None, + bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]] = None, + extraction_error_handler: tp.Optional[PEErrorHandler] = None, + bc_action_creator: tp.Callable[ + [Project, tp.List[BCFileExtensions], tp.Optional[PEErrorHandler]], + tp.List[actions.Step]] = _create_default_bc_file_creation_actions, +) -> tp.List[actions.Step]: + """ + Builds the action pipeline, if needed, to fill the BC file cache that + provides BC files for the compiled binaries of a project. - if not project_bc_files_in_cache(project, bc_file_extensions): + Args: + project: the project to compile + project_revision: specific revision that should be compiled + bc_file_extensions: list of bc file extensions + extraction_error_handler: error handler to report errors during + the extraction step + bc_action_creator: alternative BC cache actions creation callback + + Returns: required actions to populate the BC cache + """ + if not project_revision: + project_revision = ShortCommitHash( + project.active_variant[project.primary_source].version + ) + + if not project_bc_files_in_cache( + project, project_revision, bc_file_extensions + ): return bc_action_creator( project, bc_file_extensions if bc_file_extensions else [], extraction_error_handler @@ -269,6 +309,7 @@ def get_cached_bc_file_path( project: Project, binary: ProjectBinaryWrapper, required_bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]] = None, + project_revision: tp.Optional[ShortCommitHash] = None ) -> Path: """ Look up the path to a BC file from the BC cache. @@ -287,15 +328,19 @@ def get_cached_bc_file_path( ) ) + if not project_revision: + project_revision = project.version_of_primary + bc_file_path = bc_cache_folder / Extract.get_bc_file_name( project_name=project.name, binary_name=binary.name, - project_version=project.version_of_primary, + project_version=str(project_revision), bc_file_extensions=required_bc_file_extensions ) if not bc_file_path.exists(): raise LookupError( - "No corresponding BC file found in cache. Project was probably not" + f"No corresponding BC file ({bc_file_path.stem}) found in cache." + " Project was probably not" " compiled with the correct compile/extract action." ) return Path(bc_file_path) diff --git a/varats-core/varats/project/varats_project.py b/varats-core/varats/project/varats_project.py index 27805b03a..17e5ce21d 100644 --- a/varats-core/varats/project/varats_project.py +++ b/varats-core/varats/project/varats_project.py @@ -24,7 +24,9 @@ class VProject(bb.Project): # type: ignore def binaries(self) -> tp.List[ProjectBinaryWrapper]: """Return a list of binaries generated by the project.""" return self.binaries_for_revision( - ShortCommitHash(self.version_of_primary) + ShortCommitHash( + str(self.active_variant[self.primary_source].version) + ) ) @staticmethod diff --git a/varats-core/varats/utils/util.py b/varats-core/varats/utils/util.py index 003db268c..0862d5337 100644 --- a/varats-core/varats/utils/util.py +++ b/varats-core/varats/utils/util.py @@ -1,6 +1,7 @@ """Small helper methods that implement common functionalities.""" import typing as tp +from itertools import tee FunctionType = tp.TypeVar("FunctionType") @@ -22,3 +23,13 @@ def add_static_vars(func: FunctionType) -> FunctionType: return func return add_static_vars + + +T = tp.TypeVar('T') + + +# Forward port of itertools.pairwise +def pairwise(iterable: tp.Iterable[T]) -> tp.Iterable[tp.Tuple[T, T]]: + first_iter, second_iter = tee(iterable) + next(second_iter, None) + return zip(first_iter, second_iter) diff --git a/varats/varats/containers/containers.py b/varats/varats/containers/containers.py index 91762faaa..5c62a367c 100644 --- a/varats/varats/containers/containers.py +++ b/varats/varats/containers/containers.py @@ -218,9 +218,11 @@ def from_source( if editable_install: pip_args.append("-e") _set_varats_source_mount(image_context, str(src_dir)) + mount = f'type=bind,src={src_dir},target={tgt_dir}' if buildah_version() >= (1, 24, 0): mount += ',rw' + image.run( *pip_args, str(tgt_dir / 'varats-core'), diff --git a/varats/varats/data/reports/incremental_reports.py b/varats/varats/data/reports/incremental_reports.py new file mode 100644 index 000000000..94269aa2f --- /dev/null +++ b/varats/varats/data/reports/incremental_reports.py @@ -0,0 +1,329 @@ +"""Report moduel for phasar incremental analysis reports.""" +import os +import shutil +import tempfile +import typing as tp +from enum import Enum +from pathlib import Path +from statistics import mean + +import yaml + +from varats.report.report import BaseReport, FileStatusExtension, ReportFilename + + +class AnalysisType(Enum): + + value: str + + TYPE_STATE = "typestate" + TAINT = "taint" + LCA = "ide-lca" + + @staticmethod + def convert_from(value: str) -> tp.List['AnalysisType']: + enabled_analysis_types = [] + for analysis_type in AnalysisType: + if analysis_type.value in value: + enabled_analysis_types.append(analysis_type) + + return enabled_analysis_types + + def __str__(self) -> str: + return f"{self.value}" + + +class IncrementalTimings(): + """ + --- + INC_INITIAL_PT_CONSTRUCTION_TIME: 1000 + INC_INITIAL_ICFG_CONSTRUCTION_TIME: 1000 + INC_INITIAL_IRDB_CONSTRUCTION_TIME: 1000 + INC_INITIAL_TH_CONSTRUCTION_TIME: 1000 + INC_INCREMENTAL_IRDB_CONSTRUCTION_TIME: 1000 + + WPA_PT_CONSTRUCTION_TIME: 1000 + WPA_ICFG_CONSTRUCTION_TIME: 1000 + WPA_TH_CONSTRUCTION_TIME: 1000 + WPA_IRDB_CONSTRUCTION_TIME: 1000 + ... + + * WPA_IRDB_CONSTRUCTION_TIME + * WPA_TH_CONSTRUCTION_TIME + * WPA_PT_CONSTRUCTION_TIME + * WPA_ICFG_CONSTRUCTION_TIME + * WPA_DFA_TEST_SOLVING_TIME + + + * INC_INITIAL_IRDB_CONSTRUCTION_TIME + * INC_INITIAL_TH_CONSTRUCTION_TIME + * INC_INITIAL_PT_CONSTRUCTION_TIME + * INC_INITIAL_ICFG_CONSTRUCTION_TIME + * INC_INITIAL_DFA_SOLVING_TIME + + + * INC_INCREMENTAL_IRDB_CONSTRUCTION_TIME + * INC_INCREMENTAL_DELTA_CONSTRUCTION_TIME + * INC_INCREMENTAL_IR_REMAP_TIME + + * INC_INCREMENTAL_TH_CONSTRUCTION_TIME + * INC_INCREMENTAL_PT_CONSTRUCTION_TIME + * INC_INCREMENTAL_ICFG_CONSTRUCTION_TIME + * INC_INCREMENTAL_DFA_SOLVING_TIME + + """ + + @staticmethod + def create_empty_report() -> 'IncrementalTimings': + return IncrementalTimings(None) + + def __init__(self, path: tp.Optional[Path]) -> None: + self.__wpa_irdb_construction_time = [] + self.__wpa_th_construction_time = [] + self.__wpa_pt_construction_time = [] + self.__wpa_icfg_construction_time = [] + self.__wpa_dfa_test_solving_time = [] + self.__inc_initial_irdb_construction_time = [] + self.__inc_initial_th_construction_time = [] + self.__inc_initial_pt_construction_time = [] + self.__inc_initial_icfg_construction_time = [] + self.__inc_initial_dfa_solving_time = [] + self.__inc_incremental_irdb_construction_time = [] + self.__inc_incremental_delta_construction_time = [] + self.__inc_incremental_ir_remap_time = [] + self.__inc_incremental_th_construction_time = [] + self.__inc_incremental_pt_construction_time = [] + self.__inc_incremental_icfg_construction_time = [] + self.__inc_incremental_dfa_solving_time = [] + + if not path: + return + + with open(path, 'r') as stream: + documents = yaml.load_all(stream, Loader=yaml.CLoader) + for doc in documents: + for line in doc: + if line == 'WPA_IRDB_CONSTRUCTION_TIME': + self.__wpa_irdb_construction_time.append( + float(doc[line]) + ) + if line == 'WPA_TH_CONSTRUCTION_TIME': + self.__wpa_th_construction_time.append(float(doc[line])) + + if line == 'WPA_PT_CONSTRUCTION_TIME': + self.__wpa_pt_construction_time.append(float(doc[line])) + if line == 'WPA_ICFG_CONSTRUCTION_TIME': + self.__wpa_icfg_construction_time.append( + float(doc[line]) + ) + if line == 'WPA_DFA_TEST_SOLVING_TIME': + self.__wpa_dfa_test_solving_time.append( + float(doc[line]) + ) + if line == 'INC_INITIAL_IRDB_CONSTRUCTION_TIME': + self.__inc_initial_irdb_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INITIAL_TH_CONSTRUCTION_TIME': + self.__inc_initial_th_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INITIAL_PT_CONSTRUCTION_TIME': + self.__inc_initial_pt_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INITIAL_ICFG_CONSTRUCTION_TIME': + self.__inc_initial_icfg_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INITIAL_DFA_SOLVING_TIME': + self.__inc_initial_dfa_solving_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_IRDB_CONSTRUCTION_TIME': + self.__inc_incremental_irdb_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_DELTA_CONSTRUCTION_TIME': + self.__inc_incremental_delta_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_IR_REMAP_TIME': + self.__inc_incremental_ir_remap_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_TH_CONSTRUCTION_TIME': + self.__inc_incremental_th_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_PT_CONSTRUCTION_TIME': + self.__inc_incremental_pt_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_ICFG_CONSTRUCTION_TIME': + self.__inc_incremental_icfg_construction_time.append( + float(doc[line]) + ) + if line == 'INC_INCREMENTAL_DFA_SOLVING_TIME': + self.__inc_incremental_dfa_solving_time.append( + float(doc[line]) + ) + + @property + def wpa_irdb_construction_time(self) -> float: + return self.__mean(self.__wpa_irdb_construction_time) + + @property + def wpa_th_construction_time(self) -> float: + return self.__mean(self.__wpa_th_construction_time) + + @property + def wpa_pt_construction_time(self) -> float: + return self.__mean(self.__wpa_pt_construction_time) + + @property + def wpa_icfg_construction_time(self) -> float: + return self.__mean(self.__wpa_icfg_construction_time) + + @property + def wpa_dfa_test_solving_time(self) -> float: + return self.__mean(self.__wpa_dfa_test_solving_time) + + def total_wpa(self) -> float: + return self.wpa_irdb_construction_time + \ + self.wpa_th_construction_time + \ + self.wpa_pt_construction_time + \ + self.wpa_icfg_construction_time + \ + self.wpa_dfa_test_solving_time + + @property + def inc_initial_irdb_construction_time(self) -> float: + return self.__mean(self.__inc_initial_irdb_construction_time) + + @property + def inc_initial_th_construction_time(self) -> float: + return self.__mean(self.__inc_initial_th_construction_time) + + @property + def inc_initial_pt_construction_time(self) -> float: + return self.__mean(self.__inc_initial_pt_construction_time) + + @property + def inc_initial_icfg_construction_time(self) -> float: + return self.__mean(self.__inc_initial_icfg_construction_time) + + @property + def inc_initial_dfa_solving_time(self) -> float: + return self.__mean(self.__inc_initial_dfa_solving_time) + + def total_initial(self) -> float: + return self.inc_initial_irdb_construction_time + \ + self.inc_initial_th_construction_time + \ + self.inc_initial_pt_construction_time + \ + self.inc_initial_icfg_construction_time + \ + self.inc_initial_dfa_solving_time + + @property + def inc_incremental_irdb_construction_time(self) -> float: + return self.__mean(self.__inc_incremental_irdb_construction_time) + + @property + def inc_incremental_delta_construction_time(self) -> float: + return self.__mean(self.__inc_incremental_delta_construction_time) + + @property + def inc_incremental_ir_remap_time(self) -> float: + return self.__mean(self.__inc_incremental_ir_remap_time) + + @property + def inc_incremental_th_construction_time(self) -> float: + return self.__mean(self.__inc_incremental_th_construction_time) + + @property + def inc_incremental_pt_construction_time(self) -> float: + return self.__mean(self.__inc_incremental_pt_construction_time) + + @property + def inc_incremental_icfg_construction_time(self) -> float: + return self.__mean(self.__inc_incremental_icfg_construction_time) + + @property + def inc_incremental_dfa_solving_time(self) -> float: + return self.__mean(self.__inc_incremental_dfa_solving_time) + + def total_incremental(self) -> float: + return self.inc_incremental_irdb_construction_time + \ + self.inc_incremental_th_construction_time + \ + self.inc_incremental_pt_construction_time + \ + self.inc_incremental_icfg_construction_time + \ + self.inc_incremental_dfa_solving_time + + @staticmethod + def __mean(values: tp.List[float]) -> float: + if len(values) == 0: + return float('nan') + + return mean(values) + + def __str__(self) -> str: + string = f"""WPA_IRDB_CONSTRUCTION_TIME = {self.wpa_irdb_construction_time} +WPA_TH_CONSTRUCTION_TIME = {self.wpa_th_construction_time} +WPA_PT_CONSTRUCTION_TIME = {self.wpa_pt_construction_time} +WPA_ICFG_CONSTRUCTION_TIME = {self.wpa_icfg_construction_time} +WPA_DFA_TEST_SOLVING_TIME = {self.wpa_dfa_test_solving_time} +INC_INITIAL_IRDB_CONSTRUCTION_TIME = {self.inc_initial_irdb_construction_time} +INC_INITIAL_TH_CONSTRUCTION_TIME = {self.inc_initial_th_construction_time} +INC_INITIAL_PT_CONSTRUCTION_TIME = {self.inc_initial_pt_construction_time} +INC_INITIAL_ICFG_CONSTRUCTION_TIME = {self.inc_initial_icfg_construction_time} +INC_INITIAL_DFA_SOLVING_TIME = {self.inc_initial_dfa_solving_time} +INC_INCREMENTAL_IRDB_CONSTRUCTION_TIME = {self.inc_incremental_irdb_construction_time} +INC_INCREMENTAL_DELTA_CONSTRUCTION_TIME = {self.inc_incremental_delta_construction_time} +INC_INCREMENTAL_IR_REMAP_TIME = {self.inc_incremental_ir_remap_time} +INC_INCREMENTAL_TH_CONSTRUCTION_TIME = {self.inc_incremental_th_construction_time} +INC_INCREMENTAL_PT_CONSTRUCTION_TIME = {self.inc_incremental_pt_construction_time} +INC_INCREMENTAL_ICFG_CONSTRUCTION_TIME = {self.inc_incremental_icfg_construction_time} +INC_INCREMENTAL_DFA_SOLVING_TIME = {self.inc_incremental_dfa_solving_time}""" + return string + + +class IncrementalReport(BaseReport, shorthand="Inc", file_type="zip"): + """Report for phasar incremental analysis results.""" + + def __init__(self, path: Path) -> None: + super().__init__(path) + self.__ide_lca_timings = IncrementalTimings.create_empty_report() + self.__ide_typestate_timings = IncrementalTimings.create_empty_report() + self.__ifds_taint_timings = IncrementalTimings.create_empty_report() + + with tempfile.TemporaryDirectory() as tmp_result_dir: + shutil.unpack_archive(path, extract_dir=Path(tmp_result_dir)) + + for res_file in Path(tmp_result_dir).iterdir(): + print(f"{res_file=}") + if str(res_file + ).endswith('IDELinearConstantAnalysis-timings.yml'): + self.__ide_lca_timings = IncrementalTimings(res_file) + + if str(res_file).endswith('PlaceHolderTypestate-timings.yml'): + self.__ide_typestate_timings = IncrementalTimings(res_file) + + if str(res_file).endswith('PlaceHolderTaint-timings.yml'): + self.__ifds_taint_timings = IncrementalTimings(res_file) + + # TODO: impl actual file handling + collected_files = [] + for (dirpath, dirnames, filenames) in os.walk(Path(tmp_result_dir)): + collected_files.extend(filenames) + + break + + print(f"Found files: {collected_files}") + + def ide_lca_timings(self) -> IncrementalTimings: + return self.__ide_lca_timings + + def ide_typestate_timings(self) -> IncrementalTimings: + return self.__ide_typestate_timings + + def ifds_taint_timings(self) -> IncrementalTimings: + return self.__ifds_taint_timings diff --git a/varats/varats/experiments/phasar/incremental_analysis.py b/varats/varats/experiments/phasar/incremental_analysis.py new file mode 100644 index 000000000..736a47779 --- /dev/null +++ b/varats/varats/experiments/phasar/incremental_analysis.py @@ -0,0 +1,379 @@ +"""Implements experiments for evaluating different incremental analysis +approaches.""" + +import os +import shutil +import tempfile +import typing as tp +from enum import Enum +from pathlib import Path + +import benchbuild as bb +from benchbuild import Project +from benchbuild.extensions import compiler, run, time +from benchbuild.source.base import ( + RevisionStr, + target_prefix, + sources_as_dict, + Variant, + context, +) +from benchbuild.utils import actions +from benchbuild.utils.cmd import mkdir, phasar_llvm_inc +from benchbuild.utils.requirements import Requirement, SlurmMem + +from varats.data.reports.globals_report import ( + GlobalsReportWith, + GlobalsReportWithout, +) +from varats.data.reports.incremental_reports import ( + IncrementalReport, + AnalysisType, +) +from varats.experiment.experiment_util import ( + exec_func_with_pe_error_handler, + VersionExperiment, + ExperimentHandle, + wrap_unlimit_stack_size, + get_varats_result_folder, + create_default_compiler_error_handler, + create_default_analysis_failure_handler, + get_default_compile_error_wrapped, + ZippedReportFolder, +) +from varats.experiment.wllvm import ( + get_cached_bc_file_path, + BCFileExtensions, + get_bc_cache_actions_for_revision, + RunWLLVM, +) +from varats.experiments.vara.blame_experiment import ( + setup_basic_blame_experiment, + generate_basic_blame_experiment_actions, +) +from varats.project.project_util import ProjectBinaryWrapper +from varats.report.report import FileStatusExtension as FSE +from varats.report.report import ReportSpecification +from varats.utils.git_util import ( + FullCommitHash, + ShortCommitHash, + get_initial_commit, + get_all_revisions_between, +) +from varats.utils.settings import bb_cfg +from varats.utils.util import pairwise + + +def _get_enabled_analyses() -> tp.List[AnalysisType]: + """Allows overriding of analyses run by an experiment, this should only be + used for testing purposes, as the experiment will not generate all the + required results.""" + env_analysis_selection = os.getenv("PHASAR_ANALYSIS") + if env_analysis_selection: + return AnalysisType.convert_from(env_analysis_selection) + + return [at for at in AnalysisType] + + +class RunAnalysisBase(actions.Step): + """Implements the generic steps to run phasar-llvm-inc analysis comparision + tool to compare the results of a whole-program analysis with the incremental + one.""" + + NAME = "RunAnalysisBase" + DESCRIPTION = "Generic comparision analysis implementation" + BC_FILE_EXTENSIONS = [ + BCFileExtensions.NO_OPT, + BCFileExtensions.TBAA, + BCFileExtensions.BLAME, + ] + + REPS = 2 + + def __init__( + self, project: Project, experiment_handle: ExperimentHandle, + base_revision: ShortCommitHash, analysis_type: AnalysisType + ) -> None: + super().__init__(obj=project, action_fn=self.run_analysis) + + self.__experiment_handle = experiment_handle + self.__base_revision = base_revision + self.__analysis_type = analysis_type + + def run_analysis(self) -> actions.StepResult: + """Defines and runs the analysis comparision.""" + if not self.obj: + return actions.StepResult.ERROR + project = self.obj + + vara_result_folder = get_varats_result_folder(project) + binary = project.binaries[0] # we only look at one binary + + params = [ + "--module", + get_cached_bc_file_path( + project, binary, self.BC_FILE_EXTENSIONS, self.__base_revision + ) + ] + params += self._get_extra_parameters(project, binary) + params += ["-D", str(self.__analysis_type)] + + result_file_name = self.__experiment_handle.get_file_name( + IncrementalReport.shorthand(), + project_name=str(project.name), + binary_name=binary.name, + project_revision=project.version_of_primary, + project_uuid=str(project.run_uuid), + extension_type=FSE.SUCCESS + ) + with ZippedReportFolder( + vara_result_folder / result_file_name.filename + ) as result_dir: + params += ["--out", result_dir] + + run_cmd = phasar_llvm_inc[params] + + run_cmd = wrap_unlimit_stack_size(run_cmd) + + for _ in range(0, self.REPS): + print(f"Running: {run_cmd}") + exec_func_with_pe_error_handler( + run_cmd, + create_default_analysis_failure_handler( + self.__experiment_handle, project, IncrementalReport, + Path(vara_result_folder) + ) + ) + + return actions.StepResult.OK + + def _get_extra_parameters( + self, project: Project, binary: ProjectBinaryWrapper + ) -> tp.List[str]: + return [] + + +class WholeProgramAnalysis(RunAnalysisBase): + + NAME = "RunWholeAnalysis" + DESCRIPTION = "Running the configured analysis on the whole program." + + +class IncrementalProgramAnalysis(RunAnalysisBase): + + NAME = "RunIncrementalAnalysis" + DESCRIPTION = "Running the configured analysis only on the increment " \ + + "between two revisison." + + def __init__( + self, project: Project, experiment_handle: ExperimentHandle, + base_revision: ShortCommitHash, next_revision: ShortCommitHash, + analysis_type: AnalysisType + ) -> None: + super().__init__( + project, experiment_handle, base_revision, analysis_type + ) + self.__next_revision = next_revision + + def _get_extra_parameters( + self, project: Project, binary: ProjectBinaryWrapper + ) -> tp.List[str]: + return [ + "--inc-module", + str( + get_cached_bc_file_path( + project, binary, self.BC_FILE_EXTENSIONS, + self.__next_revision + ) + ) + ] + + +class AnalysisComparision(IncrementalProgramAnalysis): + + NAME = "RunIncWPACompAnalysis" + DESCRIPTION = "Running the configured analysis in both, whole program and" \ + + " incremental style." + + def _get_extra_parameters( + self, project: Project, binary: ProjectBinaryWrapper + ) -> tp.List[str]: + return super()._get_extra_parameters(project, binary) + [ + "--wpa-inc-in-memory-comparison" + ] + + +class PrecisionComparisionBase(VersionExperiment, shorthand=""): + """Implementation base for the incremental analysis evaluation.""" + + NAME = "PrecisionComparisionBase" + + REPORT_SPEC = ReportSpecification(IncrementalReport) + + def __init__( + self, revision_step_with: int, max_revisions_to_explore: int, + analysis: tp.Type[IncrementalProgramAnalysis], *args: tp.Any, + **kwargs: tp.Any + ) -> None: + super().__init__(*args, **kwargs) + self.__revision_step_with = revision_step_with + self.__max_revisions_to_explore = max_revisions_to_explore + self.__analysis = analysis + + def actions_for_project( + self, project: Project + ) -> tp.MutableSequence[actions.Step]: + + setup_basic_blame_experiment( + self, project, + self.report_spec().main_report + ) + + analysis_actions = [] + analysis_actions.extend( + generate_basic_blame_experiment_actions( + project, RunAnalysisBase.BC_FILE_EXTENSIONS, + create_default_compiler_error_handler( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + ) + ) + + # Computes list of revisions that should be analyzed + revision_list = self.compute_revisions_to_explore(project) + + # Generate all required bc files for analysis + for next_revision in revision_list[1:]: + analysis_actions.append( + actions.SetProjectVersion( + project, RevisionStr(next_revision.hash) + ) + ) + + analysis_actions.extend( + get_bc_cache_actions_for_revision( + project, next_revision, RunAnalysisBase.BC_FILE_EXTENSIONS, + create_default_compiler_error_handler( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + ) + ) + + # TODO (python3.10): replace with itertools.pairwise + for base_revision, next_revision in pairwise(reversed(revision_list)): + print(f"Compare From: {base_revision} -> {next_revision}") + analysis_actions.append( + actions.SetProjectVersion( + project, RevisionStr(next_revision.hash) + ) + ) + + for enabled_analysis_type in _get_enabled_analyses(): + # Run all analysis steps + analysis_actions.append( + self.__analysis( + project, self.get_handle(), base_revision, + next_revision, enabled_analysis_type + ) + ) + + # Clean up the generated files afterwards + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + def compute_revisions_to_explore( + self, project: Project + ) -> tp.List[ShortCommitHash]: + """Computes the list of revisions that should be explored by this + analysis.""" + project_repo_git = Path(target_prefix()) / Path(project.primary_source) + return get_all_revisions_between( + get_initial_commit(project_repo_git).hash, + project.version_of_primary, ShortCommitHash, project_repo_git + )[::-self.__revision_step_with][:self.__max_revisions_to_explore] + + +# Actuall, full scale experiments + + +class RunPhasarIncWPA(VersionExperiment, shorthand="PIWPA"): + """Run the analyses WPA style.""" + + NAME = "PIWPA" + + REPORT_SPEC = ReportSpecification(IncrementalReport) + + def actions_for_project( + self, project: Project + ) -> tp.MutableSequence[actions.Step]: + + setup_basic_blame_experiment( + self, project, + self.report_spec().main_report + ) + + analysis_actions = [] + analysis_actions.extend( + generate_basic_blame_experiment_actions( + project, RunAnalysisBase.BC_FILE_EXTENSIONS, + create_default_compiler_error_handler( + self.get_handle(), project, self.REPORT_SPEC.main_report + ) + ) + ) + + for enabled_analysis_type in _get_enabled_analyses(): + analysis_actions.append( + WholeProgramAnalysis( + project, self.get_handle(), + ShortCommitHash(project.version_of_primary), + enabled_analysis_type + ) + ) + + # Clean up the generated files afterwards + analysis_actions.append(actions.Clean(project)) + + return analysis_actions + + +class RunPhasarIncIncremental(PrecisionComparisionBase, shorthand="PIInc"): + """Run the analyses incremental style.""" + + NAME = "PIInc" + + def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: + super().__init__(1, 2, IncrementalProgramAnalysis, *args, **kwargs) + + +class RunPhasarIncCompare(PrecisionComparisionBase, shorthand="PIComp"): + """Run the analyses incremental, as well as, WPA style and compare their + results.""" + + NAME = "PIComp" + + def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: + super().__init__(1, 2, AnalysisComparision, *args, **kwargs) + + +class IncrementalAnalysisPrecisionComparisionS1( + PrecisionComparisionBase, shorthand="IncAPCs1" +): + """Evaluation of the incremental analysis, using a 1 rev step width.""" + + NAME = "IncAPCs1" + + def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: + super().__init__(1, 3, AnalysisComparision, *args, **kwargs) + + +class IncrementalAnalysisPrecisionComparisionS5( + PrecisionComparisionBase, shorthand="IncAPCs5" +): + """Evaluation of the incremental analysis, using a 5 rev step width.""" + + NAME = "IncAPCs5" + + def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: + super().__init__(5, 3, AnalysisComparision, *args, **kwargs) diff --git a/varats/varats/jupyterhelper/file.py b/varats/varats/jupyterhelper/file.py index 501d2c1d8..e20a20012 100644 --- a/varats/varats/jupyterhelper/file.py +++ b/varats/varats/jupyterhelper/file.py @@ -15,6 +15,7 @@ GlobalsReportWith, GlobalsReportWithout, ) +from varats.data.reports.incremental_reports import IncrementalReport from varats.data.reports.szz_report import ( SZZUnleashedReport, SZZReport, @@ -118,6 +119,14 @@ def load_globals_without_report(file_path: Path) -> \ return VDM.load_data_class_sync(file_path, GlobalsReportWithout) +def load_incremental_report(file_path: Path) -> IncrementalReport: + """ + Load a IncrementalReport from a file. + + Attributes: + file_path (Path): Full path to the file + """ + return VDM.load_data_class_sync(file_path, IncrementalReport) def load_feature_analysis_report(file_path: Path) -> \ FeatureAnalysisReport: """ diff --git a/varats/varats/paper_mgmt/paper_config.py b/varats/varats/paper_mgmt/paper_config.py index 7a07a5f45..cc8bcdde7 100644 --- a/varats/varats/paper_mgmt/paper_config.py +++ b/varats/varats/paper_mgmt/paper_config.py @@ -296,6 +296,9 @@ def __init__( super().__init__(*args, **kwargs) self.__project_name = project_name + def explore(self) -> tp.Iterable[bb.source.base.Variant]: + return super().versions() + def versions(self) -> tp.List[bb.source.base.Variant]: proj_filter = project_filter_generator(self.__project_name) diff --git a/varats/varats/plots/incremental_eval.py b/varats/varats/plots/incremental_eval.py new file mode 100644 index 000000000..3468c2ec1 --- /dev/null +++ b/varats/varats/plots/incremental_eval.py @@ -0,0 +1,371 @@ +"""Module for BlameInteractionGraph plots.""" + +import logging +import math +import typing as tp + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +from varats.data.reports.incremental_reports import IncrementalReport +from varats.jupyterhelper.file import load_incremental_report +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.paper_mgmt.paper_config import get_loaded_paper_config +from varats.plot.plot import Plot, PlotDataEmpty +from varats.plot.plots import PlotGenerator, PlotConfig +from varats.revision.revisions import get_processed_revisions_files +from varats.utils.git_util import FullCommitHash + +LOG = logging.Logger(__name__) + + +def _round_delta(base_line: float, increment: float) -> float: + delta = increment - float(base_line) + per_delta = delta / float(base_line) * 100 + per_delta = round(per_delta, 2) + return per_delta + + +class PhasarIncRevisionDeltaViolinPlot(Plot, plot_name='psr_inc_rev_deltas'): + """Violin plot to visualize incremental speed deltas for all revisions.""" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any) -> None: + super().__init__(self.NAME, plot_config, **kwargs) + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + + rev_deltas: tp.List[tp.Dict[str, tp.Any]] = [] + project_names: tp.Set[str] = set() + for case_study in case_studies: + project_name = case_study.project_name + print(f"Processing: {project_name=}") + + report_files = get_processed_revisions_files( + case_study.project_name, IncrementalReport, + get_case_study_file_name_filter(case_study) + ) + + if not report_files: + continue + + for report_file in report_files: + report = load_incremental_report(report_file) + + rev_delta_lca = _round_delta( + report.ide_lca_timings().total_wpa(), + report.ide_lca_timings().total_incremental() + ) + rev_delta_taint = _round_delta( + report.ifds_taint_timings().total_wpa(), + report.ifds_taint_timings().total_incremental() + ) + rev_delta_typestate = _round_delta( + report.ide_typestate_timings().total_wpa(), + report.ide_typestate_timings().total_incremental() + ) + + if math.isnan(rev_delta_lca): + continue + + rev_deltas.append({ + "Project": project_name, + "TimeDeltaLCA": rev_delta_lca, + "TimeDeltaTaint": rev_delta_taint, + "TimeDeltaTypestate": rev_delta_typestate + }) + + project_names.add(project_name) + + if not rev_deltas: + LOG.warning("There were no projects found with enough data points.") + raise PlotDataEmpty + + data = pd.DataFrame(rev_deltas) + print(f"{data=}") + + fig, axes = plt.subplots(3, 1, sharex=True, sharey=True) + fig.subplots_adjust(hspace=0.03) + box_style = dict(boxstyle='round', facecolor='blue', alpha=0.3) + box_fontsize = 8 + + # Plot - + sns.violinplot( + ax=axes[0], + x="Project", + y="TimeDeltaLCA", + data=data, + order=sorted(project_names), + inner=None, + linewidth=1, + color=".95" + ) + sns.stripplot( + ax=axes[0], + x="Project", + y="TimeDeltaLCA", + data=data, + order=sorted(project_names), + alpha=.25, + size=3 + ) + axes[0].text( + 0.95, + 0.9, + "LCA", + transform=axes[0].transAxes, + fontsize=box_fontsize, + verticalalignment='top', + horizontalalignment='right', + bbox=box_style + ) + + # Plot - + sns.violinplot( + ax=axes[1], + x="Project", + y="TimeDeltaTaint", + data=data, + order=sorted(project_names), + inner=None, + linewidth=1, + color=".95" + ) + sns.stripplot( + ax=axes[1], + x="Project", + y="TimeDeltaTaint", + data=data, + order=sorted(project_names), + alpha=.25, + size=3 + ) + box_style['facecolor'] = 'green' + axes[1].text( + 0.95, + 0.9, + "Taint", + transform=axes[1].transAxes, + fontsize=box_fontsize, + verticalalignment='top', + horizontalalignment='right', + bbox=box_style + ) + + # Plot - + sns.violinplot( + ax=axes[2], + x="Project", + y="TimeDeltaTypestate", + data=data, + order=sorted(project_names), + inner=None, + linewidth=1, + color=".95" + ) + sns.stripplot( + ax=axes[2], + x="Project", + y="TimeDeltaTypestate", + data=data, + order=sorted(project_names), + alpha=.25, + size=3 + ) + box_style['facecolor'] = 'red' + axes[2].text( + 0.95, + 0.9, + "Typestate", + transform=axes[2].transAxes, + fontsize=box_fontsize, + verticalalignment='top', + horizontalalignment='right', + bbox=box_style + ) + + for ax in axes: + # ax.set_ylim(-0.1, 1.1) + ax.set_aspect(0.3 / ax.get_data_ratio()) + ax.tick_params(axis='x', labelrotation=45) + ax.set_xlabel(None) + ax.set_ylabel(None) + + axes[1].set_ylabel("Analysis Time Reduction in %") + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class PIRDViolinPlotGenerator( + PlotGenerator, generator_name="psr-inc-rev-deltas", options=[] +): + """Generates a violin plot showing the distribution of incremental analysis + speedup deltas for each case study.""" + + def generate(self) -> tp.List[Plot]: + return [ + PhasarIncRevisionDeltaViolinPlot( + self.plot_config, **self.plot_kwargs + ) + ] + + +class PhasarIncHelperAnalysisViolinPlot( + Plot, plot_name='psr_inc_helper_shares' +): + """Violing plot to visualize incremental speed deltas for helper + analyses.""" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any) -> None: + super().__init__(self.NAME, plot_config, **kwargs) + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + + rev_deltas: tp.List[tp.Dict[str, tp.Any]] = [] + project_names: tp.Set[str] = set() + for case_study in case_studies: + project_name = case_study.project_name + print(f"Processing: {project_name=}") + + report_files = get_processed_revisions_files( + case_study.project_name, IncrementalReport, + get_case_study_file_name_filter(case_study) + ) + + if not report_files: + continue + + for report_file in report_files: + report = load_incremental_report(report_file) + + def build_data_for_timings(timings, analysis): + irdb = timings.inc_incremental_irdb_construction_time + irdb_delta = _round_delta( + timings.inc_initial_irdb_construction_time, + timings.inc_incremental_irdb_construction_time + ) + + th = timings.inc_incremental_th_construction_time + th_delta = _round_delta( + timings.inc_initial_th_construction_time, + timings.inc_incremental_th_construction_time + ) + + pt = timings.inc_incremental_pt_construction_time + pt_delta = _round_delta( + timings.inc_initial_pt_construction_time, + timings.inc_incremental_pt_construction_time + ) + + icfg = timings.inc_incremental_icfg_construction_time + icfg_delta = _round_delta( + timings.inc_initial_icfg_construction_time, + timings.inc_incremental_icfg_construction_time + ) + + dfa = timings.inc_incremental_dfa_solving_time + dfa_delta = _round_delta( + timings.inc_initial_dfa_solving_time, + timings.inc_incremental_dfa_solving_time + ) + + total = irdb + th + pt + icfg + dfa + + rev_deltas.append({ + "Project": project_name, + "Analysis": analysis, + "AnalysisPart": "IRDB", + "Proportion": irdb / total, + "Delta": irdb_delta + }) + rev_deltas.append({ + "Project": project_name, + "Analysis": analysis, + "AnalysisPart": "TH", + "Proportion": th / total, + "Delta": th_delta + }) + rev_deltas.append({ + "Project": project_name, + "Analysis": analysis, + "AnalysisPart": "PT", + "Proportion": pt / total, + "Delta": pt_delta + }) + rev_deltas.append({ + "Project": project_name, + "Analysis": analysis, + "AnalysisPart": "ICFG", + "Proportion": icfg / total, + "Delta": icfg_delta + }) + rev_deltas.append({ + "Project": project_name, + "Analysis": analysis, + "AnalysisPart": "DFA", + "Proportion": dfa / total, + "Delta": dfa_delta + }) + + build_data_for_timings(report.ide_lca_timings(), "lca") + build_data_for_timings( + report.ide_typestate_timings(), "typestate" + ) + build_data_for_timings(report.ifds_taint_timings(), "taint") + + if not rev_deltas: + LOG.warning("There were no projects found with enough data points.") + raise PlotDataEmpty + + helper_analyses = ["IRDB", "TH", "PT", "ICFG", "DFA"] + + data = pd.DataFrame(rev_deltas) + # pd.set_option("display.max_rows", None, "display.max_columns", None) + print(f"{data=}") + ax = sns.violinplot( + x="AnalysisPart", + # y="Proportion", + y="Delta", + data=data, + order=helper_analyses, + inner=None, + linewidth=1, + color=".95" + ) + sns.stripplot( + x="AnalysisPart", + # y="Proportion", + y="Delta", + data=data, + order=helper_analyses, + alpha=.25, + size=3 + ) + # ax.set_ylim(-0.05, None) + ax.set_aspect(0.3 / ax.get_data_ratio()) + ax.tick_params(axis='x', labelrotation=45) + ax.set_xlabel(None) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class PIHAViolinPlotGenerator( + PlotGenerator, generator_name="psr-inc-helper-shares", options=[] +): + """Generates a violin plot showing the distribution of incremental analysis + speedup deltas for each helper analysis.""" + + def generate(self) -> tp.List[Plot]: + return [ + PhasarIncHelperAnalysisViolinPlot( + self.plot_config, **self.plot_kwargs + ) + ] diff --git a/varats/varats/projects/c_projects/gravity.py b/varats/varats/projects/c_projects/gravity.py index 6a2aa84fb..398ccacb8 100644 --- a/varats/varats/projects/c_projects/gravity.py +++ b/varats/varats/projects/c_projects/gravity.py @@ -99,11 +99,10 @@ def compile(self) -> None: # commit 46133fb47d6da1f0dec27ae23db1d633bc72e9e3 introduced # cmake as build system - with local.cwd(gravity_git_path): - cmake_revisions = get_all_revisions_between( - "dbb4d61fc2ebb9aca44e8e6bb978efac4a6def87", "master", - ShortCommitHash - ) + cmake_revisions = get_all_revisions_between( + "dbb4d61fc2ebb9aca44e8e6bb978efac4a6def87", "master", + ShortCommitHash, gravity_git_path + ) if gravity_version in cmake_revisions: self.__compile_cmake() diff --git a/varats/varats/projects/test_projects/commit_inc_scenarios.py b/varats/varats/projects/test_projects/commit_inc_scenarios.py new file mode 100644 index 000000000..da6a2f6ec --- /dev/null +++ b/varats/varats/projects/test_projects/commit_inc_scenarios.py @@ -0,0 +1,381 @@ +import typing as tp + +import benchbuild as bb +from benchbuild.utils.cmd import cmake, make, mkdir +from benchbuild.utils.settings import get_number_of_jobs +from plumbum import local + +from varats.paper_mgmt.paper_config import project_filter_generator +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import ( + ProjectBinaryWrapper, + wrap_paths_to_binaries, + BinaryType, + verify_binaries, +) +from varats.project.varats_project import VProject +from varats.ts_utils.project_sources import VaraTestRepoSource +from varats.utils.git_util import ShortCommitHash +from varats.utils.settings import bb_cfg + + +class VCSTestBasic01(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestBasic01" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestBasic01", + remote="VCSAnalysisRepos/Basic01", + local="VCSTestBasic01", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + basic_01_version_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(basic_01_version_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) + + +class VCSTestBasic02(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestBasic02" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestBasic02", + remote="VCSAnalysisRepos/Basic02", + local="VCSTestBasic02", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + basic_02_version_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(basic_02_version_source): + bb.watch(c_compiler + )("main.c", "-Wl,--warn-unresolved-symbols", "-o", "main") + + verify_binaries(self) + + +class VCSTestBasic03(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestBasic03" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestBasic03", + remote="VCSAnalysisRepos/Basic03", + local="VCSTestBasic03", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + basic_03_version_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(basic_03_version_source): + bb.watch(c_compiler + )("main.c", "-Wl,--warn-unresolved-symbols", "-o", "main") + + verify_binaries(self) + + +class VCSTestBasic04(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestBasic04" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestBasic04", + remote="VCSAnalysisRepos/Basic04", + local="VCSTestBasic04", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + basic_04_version_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(basic_04_version_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) + + +class VCSTestBasic05(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestBasic05" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestBasic05", + remote="VCSAnalysisRepos/Basic05", + local="VCSTestBasic05", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + basic_05_version_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(basic_05_version_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) + + +class VCSTestCall01(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestCall01" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestCall01", + remote="VCSAnalysisRepos/Call01", + local="VCSTestCall01", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + call_01_version_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(call_01_version_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) + + +class VCSTestDeletionWithInteraction(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestDeletionWithInteraction" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestDeletionWithInteraction", + remote="VCSAnalysisRepos/DeletionWithInteraction", + local="VCSTestDeletionWithInteraction", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + del_with_inter_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(del_with_inter_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) + + +class VCSTestDeletionWithoutInteraction(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestDeletionWithoutInteraction" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestDeletionWithoutInteraction", + remote="VCSAnalysisRepos/DeletionWithoutInteraction", + local="VCSTestDeletionWithoutInteraction", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + del_without_inter_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(del_without_inter_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) + + +class VCSTestMergeExample01(VProject): + """Test scenario for the incremental analysis.""" + + NAME = "VCSTestMergeExample01" + GROUP = "test_projects" + DOMAIN = ProjectDomains.TEST + + SOURCE = [ + VaraTestRepoSource( + project_name="VCSTestMergeExample01", + remote="VCSAnalysisRepos/MergeExample01", + local="VCSTestMergeExample01", + refspec="HEAD", + limit=None, + shallow=False + ) + ] + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + return wrap_paths_to_binaries([("main", BinaryType.EXECUTABLE)]) + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Contains instructions on how to build the project.""" + + merge_example_01_source = local.path(self.source_of_primary) + + c_compiler = bb.compiler.cc(self) + + with local.cwd(merge_example_01_source): + bb.watch(c_compiler)("main.c", "-o", "main") + + verify_binaries(self) diff --git a/varats/varats/table/table_utils.py b/varats/varats/table/table_utils.py index d6e1d2f1b..e3a908a1e 100644 --- a/varats/varats/table/table_utils.py +++ b/varats/varats/table/table_utils.py @@ -10,7 +10,10 @@ def wrap_table_in_latex_document( - table: str, landscape: bool = False, margin: float = 1.5 + table: str, + landscape: bool = False, + margin: float = 1.5, + document_decorator: tp.Callable[[Document], None] = lambda x: x ) -> str: """ Wraps a table inside a proper latex document. @@ -39,6 +42,7 @@ def wrap_table_in_latex_document( Package("hyperref"), Package("longtable"), Package("multirow"), + Package('multicol'), Package("xcolor", options=["table"]), ]) @@ -47,6 +51,8 @@ def wrap_table_in_latex_document( # embed latex table inside document doc.append(NoEscape(table)) + document_decorator(doc) + return tp.cast(str, doc.dumps()) @@ -56,6 +62,7 @@ def dataframe_to_table( style: tp.Optional["pd.io.formats.style.Styler"] = None, wrap_table: bool = False, wrap_landscape: bool = False, + document_decorator: tp.Callable[[Document], None] = lambda x: x, **kwargs: tp.Any ) -> str: """ @@ -82,7 +89,9 @@ def dataframe_to_table( if table_format.is_latex(): table = style.to_latex(**kwargs) if wrap_table: - table = wrap_table_in_latex_document(table, wrap_landscape) + table = wrap_table_in_latex_document( + table, wrap_landscape, document_decorator=document_decorator + ) elif table_format.is_html(): table = style.to_html(**kwargs) diff --git a/varats/varats/tables/incremental_eval.py b/varats/varats/tables/incremental_eval.py new file mode 100644 index 000000000..5d227c0bb --- /dev/null +++ b/varats/varats/tables/incremental_eval.py @@ -0,0 +1,289 @@ +import typing as tp + +import numpy as np +import pandas as pd +from benchbuild.utils.cmd import git +from pylatex import Document, Package +from scipy.stats import gmean +from tabulate import tabulate + +from varats.data.reports.incremental_reports import ( + AnalysisType, + IncrementalReport, +) +from varats.jupyterhelper.file import load_incremental_report +from varats.paper.case_study import CaseStudy +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.paper_mgmt.paper_config import get_loaded_paper_config +from varats.project.project_util import ( + get_local_project_git, + get_project_cls_by_name, +) +from varats.revision.revisions import get_processed_revisions_files +from varats.table.table import Table +from varats.table.table_utils import ( + wrap_table_in_latex_document, + dataframe_to_table, +) +from varats.table.tables import TableFormat, TableGenerator +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY +from varats.utils.git_util import calc_repo_loc + + +def _round_and_format_delta(base_line: float, increment: float) -> float: + delta = increment - float(base_line) + per_delta = delta / float(base_line) * 100 + per_delta = round(per_delta, 2) + return per_delta + + +def create_df_for_report(report: IncrementalReport, project_name, t): + cs_dict: tp.Dict[tp.Tuple[str, str], tp.Any] = {} + + cs_dict[("taint", "WPA")] = report.ifds_taint_timings().total_wpa() + cs_dict[("taint", "INC")] = report.ifds_taint_timings().total_incremental() + cs_dict[("taint", "delta")] = 0 + + cs_dict[("lca", "WPA")] = report.ide_lca_timings().total_wpa() + cs_dict[("lca", "INC")] = report.ide_lca_timings().total_incremental() + cs_dict[("lca", "delta")] = 0 + + cs_dict[("typestate", "WPA")] = report.ide_typestate_timings().total_wpa() + cs_dict[("typestate", "INC") + ] = report.ide_typestate_timings().total_incremental() + cs_dict[("typestate", "delta")] = 0 + + return pd.DataFrame.from_dict({project_name: cs_dict}, orient="index") + + +from random import seed, randint + +seed(1) + + +def create_df_for_report_fake(project_name): + + cs_dict: tp.Dict[tp.Tuple[str, str], tp.Any] = {} + + cs_dict[("taint", "WPA")] = randint(1, 100) + cs_dict[("taint", "INC")] = randint(1, 100) + cs_dict[("taint", "delta")] = 0 + + cs_dict[("lca", "WPA")] = randint(1, 100) + cs_dict[("lca", "INC")] = randint(1, 100) + cs_dict[("lca", "delta")] = 0 + + cs_dict[("typestate", "WPA")] = randint(1, 100) + cs_dict[("typestate", "INC")] = randint(1, 100) + cs_dict[("typestate", "delta")] = 0 + + return pd.DataFrame.from_dict({project_name: cs_dict}, orient="index") + + +def _color_and_format_delta_cell(x) -> tp.Any: + if x > 0: + return "\\cellcolor{cellRed}" + str(x) + "\%" + + return "\\cellcolor{cellGreen}" + str(x) + "\%" + + +class PhasarIncrementalDataComparision(Table, table_name="phasar_inc_overview"): + """Comparision overview of gathered phasar-incremental analysis data to + compare the effect of running an analysis incrementally.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies: tp.List[CaseStudy] = get_loaded_paper_config( + ).get_all_case_studies() + + cs_data: tp.List[pd.DataFrame] = [] + + for case_study in case_studies: + report_files = get_processed_revisions_files( + case_study.project_name, IncrementalReport, + get_case_study_file_name_filter(case_study) + ) + print(f"{report_files=}") + + # binary = case_study.project_cls.binaries_for_revision( + # case_study.revisions[0] + # )[0] + + project_name = case_study.project_name + + current_cs_data: tp.List[pd.DataFrame] = [] + + for report_file in report_files: + report = load_incremental_report(report_file) + print(report.ide_lca_timings()) + # cs_data.append(create_df_for_report(report, project_name, 1)) + current_cs_data.append( + create_df_for_report(report, project_name, 1) + ) + + # current_cs_data.append(create_df_for_report_fake(project_name)) + # current_cs_data.append(create_df_for_report_fake(project_name)) + # current_cs_data.append(create_df_for_report_fake(project_name)) + # current_cs_data.append(create_df_for_report_fake(project_name)) + + df = pd.concat(current_cs_data) + df = df.agg(['mean']) + df.rename(index={'mean': project_name}, inplace=True) + + df[("taint", "delta")] = _round_and_format_delta( + df[("taint", "WPA")], df[("taint", "INC")] + ) + df[ + ("lca", "delta") + ] = _round_and_format_delta(df[("lca", "WPA")], df[("lca", "INC")]) + df[("typestate", "delta")] = _round_and_format_delta( + df[("typestate", "WPA")], df[("typestate", "INC")] + ) + + df[("total", "WPA")] = gmean([ + df[("taint", "WPA")], df[("lca", "WPA")], + df[("typestate", "WPA")] + ]) + df[("total", "INC")] = gmean([ + df[("taint", "INC")], df[("lca", "INC")], + df[("typestate", "INC")] + ]) + df[("total", "delta")] = _round_and_format_delta( + df[("total", "WPA")], df[("total", "INC")] + ) + + cs_data.append(df) + + df = pd.concat(cs_data) + df = df.round(2) + + df[('taint', 'delta') + ] = df[('taint', 'delta')].apply(_color_and_format_delta_cell) + df[('lca', 'delta') + ] = df[('lca', 'delta')].apply(_color_and_format_delta_cell) + df[('typestate', 'delta') + ] = df[('typestate', 'delta')].apply(_color_and_format_delta_cell) + df[('total', 'delta') + ] = df[('total', 'delta')].apply(_color_and_format_delta_cell) + + # Do final formating of column names + df.rename( + columns={ + "taint": "Taint", + "lca": "LCA", + "typestate": "Typestate", + "total": "Total", + "WPA": "\\multicolumn{1}{c}{WPA}", + "INC": "\\multicolumn{1}{c}{INC}", + "delta": "\\multicolumn{1}{c}{$\\Delta$}" + }, + inplace=True + ) + + table_kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} + if table_format.is_latex(): + df.style.format('j') + caption = ("TEST") + + # table_kwargs["index"] = True + table_kwargs["escape"] = False + table_kwargs["column_format"] = "lccc|ccc|ccc|ccc" + # table_kwargs["# bold_rows"] = True + table_kwargs["multicolumn_format"] = "c" + table_kwargs["multicolumn"] = True, + # table_kwargs["longtable"] = True + table_kwargs["caption"] = caption + # table_kwargs["float_format"] = '{:0.2f}'.format + table_kwargs["float_format"] = '%.2f' + + def add_doc_defs(doc: Document) -> None: + doc.packages.append(Package('xcolor')) + doc.packages.append(Package('colortbl')) + doc.add_color("cellGreen", model="HTML", description="66ff33") + doc.add_color("cellRed", model="HTML", description="ff3333") + + return dataframe_to_table( + df, + table_format, + wrap_table, + wrap_landscape=True, + document_decorator=add_doc_defs, + **table_kwargs + ) + + +class PhasarIncrementalDataComparisionGenerator( + TableGenerator, generator_name="phasar-inc-overview", options=[] +): + + def generate(self) -> tp.List[Table]: + return [ + PhasarIncrementalDataComparision( + self.table_config, **self.table_kwargs + ) + ] + + +class PhasarIncMetricsTable(Table, table_name="phasar_inc_cs_metrics"): + """Table showing some general information about the case studies in a paper + config.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + + cs_data: tp.List[pd.DataFrame] = [] + for case_study in case_studies: + project_name = case_study.project_name + project_cls = get_project_cls_by_name(project_name) + project_repo = get_local_project_git(project_name) + project_path = project_repo.path[:-5] + project_git = git["-C", project_path] + + revision = self.table_kwargs.get("revisions", {}).get( + case_study.project_name, None + ) + revisions = case_study.revisions + if not revision and len(revisions) == 1: + revision = revisions[0] + rev_range = revision.hash if revision else "HEAD" + + cs_dict = { + project_name: { + "Domain": + str(project_cls.DOMAIN)[0].upper() + + str(project_cls.DOMAIN)[1:], + "LOC": + calc_repo_loc(project_repo, rev_range), + "Commits": + int(project_git("rev-list", "--count", rev_range)), + # "Authors": + # len( + # project_git("shortlog", "-s", + # rev_range).splitlines() + # ) + "Samples": + len(case_study.revisions) + } + } + # if revision: + # cs_dict[project_name]["Revision"] = revision.short_hash + + cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index")) + + df = pd.concat(cs_data).sort_index() + + table_kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} + if table_format.is_latex(): + table_kwargs["multicolumn_format"] = "c" + table_kwargs["multirow"] = True + + return dataframe_to_table( + df, table_format, wrap_table, wrap_landscape=True, **table_kwargs + ) + + +class PhasarIncMetricsTableGenerator( + TableGenerator, generator_name="phasar-inc-cs-metrics", options=[] +): + + def generate(self) -> tp.List[Table]: + return [PhasarIncMetricsTable(self.table_config, **self.table_kwargs)] diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index f9d275aeb..068d57bda 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -85,6 +85,7 @@ def create_new_bb_config( 'varats.projects.cpp_projects.doxygen', 'varats.projects.cpp_projects' '.two_libs_one_project_interaction_discrete_libs_single_project' ] + if include_test_projects: projects_conf.value[:] += [ 'varats.projects.test_projects.basic_tests', @@ -93,7 +94,8 @@ def create_new_bb_config( 'varats.projects.test_projects.linker_check', 'varats.projects.test_projects.taint_tests', 'varats.projects.test_projects.test_suite', - 'varats.projects.perf_tests.feature_perf_cs_collection' + 'varats.projects.perf_tests.feature_perf_cs_collection', + 'varats.projects.test_projects.commit_inc_scenarios' ] # Experiments for VaRA @@ -110,6 +112,7 @@ def create_new_bb_config( 'varats.experiments.vara.blame_verifier_experiment', 'varats.experiments.vara.phasar_fta', 'varats.experiments.phasar.ide_linear_constant_experiment', + 'varats.experiments.phasar.incremental_analysis', 'varats.experiments.phasar.global_analysis_compare', 'varats.experiments.szz.szz_unleashed_experiment', 'varats.experiments.szz.pydriller_szz_experiment', diff --git a/varats/varats/tools/driver_build_setup.py b/varats/varats/tools/driver_build_setup.py index 796346c96..7339a096b 100644 --- a/varats/varats/tools/driver_build_setup.py +++ b/varats/varats/tools/driver_build_setup.py @@ -258,7 +258,7 @@ def _build_in_container( install_prefix: tp.Optional[Path] = None ) -> None: vara_cfg()["container"]["research_tool"] = tool.name - image_name = f"{image_base.image_name}_{build_type.name}" + image_name = f"{image_base.image_name}_dev" if not install_prefix: install_prefix = Path( diff --git a/varats/varats/tools/research_tools/vara.py b/varats/varats/tools/research_tools/vara.py index e121e38d0..36dfb6f69 100644 --- a/varats/varats/tools/research_tools/vara.py +++ b/varats/varats/tools/research_tools/vara.py @@ -162,7 +162,9 @@ class VaRA(ResearchTool[VaRACodeBase]): }) def __init__(self, base_dir: Path) -> None: - super().__init__("VaRA", [BuildType.DEV], VaRACodeBase(base_dir)) + super().__init__( + "VaRA", [BuildType.DEV, BuildType.OPT], VaRACodeBase(base_dir) + ) vara_cfg()["vara"]["llvm_source_dir"] = str(base_dir) save_config()