diff --git a/tests/utils/test_git_util.py b/tests/utils/test_git_util.py index a2dc084ec..d39efaa00 100644 --- a/tests/utils/test_git_util.py +++ b/tests/utils/test_git_util.py @@ -28,6 +28,7 @@ get_submodule_head, calc_code_churn_range, RepositoryAtCommit, + calc_surviving_lines, ) @@ -226,6 +227,24 @@ def test_contains_source_code_with(self) -> None: ) ) + def test_calc_surviving_lines(self): + lines = calc_surviving_lines( + "MutliMethodAuthorCoordination", + FullCommitHash("f2f294bdda48526915b5a018e7e91f9f80204269") + ) + self.assertEqual( + lines[FullCommitHash("28f1624bda75a0c2da961e2572f9eebc31998346")], 3 + ) + self.assertEqual( + lines[FullCommitHash("9209cff2d5b6cf9b7b39020b43081bd840347be2")], 4 + ) + self.assertEqual( + lines[FullCommitHash("ffb0fb502072846e081ac9f63f1eb86667197b95")], 3 + ) + self.assertEqual( + lines[FullCommitHash("f2f294bdda48526915b5a018e7e91f9f80204269")], 9 + ) + class TestChurnConfig(unittest.TestCase): """Test if ChurnConfig sets languages correctly.""" diff --git a/uicomponents/CaseStudyGeneration.ui b/uicomponents/CaseStudyGeneration.ui index 92b554b2c..6c97fa963 100644 --- a/uicomponents/CaseStudyGeneration.ui +++ b/uicomponents/CaseStudyGeneration.ui @@ -10,7 +10,7 @@ 0 0 760 - 443 + 491 @@ -150,6 +150,19 @@ + + + + + + + + + + Filter CaseStudy´ + + + @@ -358,8 +371,8 @@ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> <html><head><meta name="qrichtext" content="1" /><style type="text/css"> p, li { white-space: pre-wrap; } -</style></head><body style=" font-family:'Ubuntu'; font-size:11pt; font-weight:400; font-style:normal;"> -<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p></body></html> +</style></head><body style=" font-family:'Noto Sans'; font-size:10pt; font-weight:400; font-style:normal;"> +<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; font-family:'Ubuntu'; font-size:11pt;"><br /></p></body></html> @@ -374,7 +387,7 @@ p, li { white-space: pre-wrap; } 0 0 760 - 22 + 34 diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index d70af5353..e0fe45b94 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -366,7 +366,7 @@ def report_spec(cls) -> ReportSpecification: return cls.REPORT_SPEC @classmethod - def file_belongs_to_experiment(cls, file_name: str) -> bool: + def file_belongs_to_experiment(cls, file_name: ReportFilename) -> bool: """ Checks if the file belongs to this experiment. @@ -377,7 +377,7 @@ def file_belongs_to_experiment(cls, file_name: str) -> bool: True, if the file belongs to this experiment type """ try: - other_short_hand = ReportFilename(file_name).experiment_shorthand + other_short_hand = file_name.experiment_shorthand return cls.shorthand() == other_short_hand except ValueError: return False diff --git a/varats-core/varats/mapping/author_map.py b/varats-core/varats/mapping/author_map.py index b64af1b76..a8e820564 100644 --- a/varats-core/varats/mapping/author_map.py +++ b/varats-core/varats/mapping/author_map.py @@ -31,7 +31,7 @@ def __eq__(self, other) -> bool: return False def __str__(self) -> str: - return f"{self.name} <{self.mail}>" + return f"{self.name} {self.mail}" def __repr__(self) -> str: return f"{self.name} <{self.mail}>; {self.names},{self.mail_addresses}" diff --git a/varats-core/varats/utils/git_util.py b/varats-core/varats/utils/git_util.py index 18cb6f714..373319e3c 100644 --- a/varats-core/varats/utils/git_util.py +++ b/varats-core/varats/utils/git_util.py @@ -476,7 +476,7 @@ class Language(Enum): value: tp.Set[str] # pylint: disable=invalid-name C = {"h", "c"} - CPP = {"h", "hxx", "hpp", "cxx", "cpp", "cc"} + CPP = {"h", "hxx", "hpp", "cxx", "cpp", "cc", "hh"} def __init__(self) -> None: self.__enabled_languages: tp.List[ChurnConfig.Language] = [] @@ -1135,7 +1135,7 @@ def branch_has_upstream( return tp.cast(bool, exit_code == 0) -class RepositoryAtCommit(): +class RepositoryAtCommit: """Context manager to work with a repository at a specific revision, without duplicating the repository.""" @@ -1143,7 +1143,9 @@ def __init__(self, project_name: str, revision: ShortCommitHash) -> None: self.__repo = pygit2.Repository( get_local_project_git_path(project_name) ) - self.__initial_head = self.__repo.head + + self.__initial_head: pygit2.Reference = self.__repo.head + print(self.__initial_head.name) self.__revision = self.__repo.get(revision.hash) def __enter__(self) -> Path: @@ -1155,4 +1157,59 @@ def __exit__( exc_value: tp.Optional[BaseException], exc_traceback: tp.Optional[TracebackType] ) -> None: - self.__repo.checkout(self.__initial_head) + self.__repo.checkout( + self.__initial_head, strategy=pygit2.GIT_CHECKOUT_FORCE + ) + + +def calc_surviving_lines(project_name: str, revision: tp.Optional[FullCommitHash] = None) -> \ +tp.Dict[FullCommitHash, int]: + """ + Get the surviving lines of older commits at a given revision. + + Args: + project_name: project to analyze + revision: revision to analyze at + + returns: number of lines per prior commit + """ + churn_config = ChurnConfig.create_c_style_languages_config() + file_pattern = re.compile( + "|".join(churn_config.get_extensions_repr(r"^.*\.", r"$")) + ) + if revision is not None: + hash = revision.hash + else: + hash = "HEAD" + lines_per_revision: tp.Dict[FullCommitHash, int] = {} + repo = pygit2.Repository(get_local_project_git_path(project_name)) + + initial_head: pygit2.Reference = repo.head + repo_folder = get_local_project_git_path(project_name) + git(__get_git_path_arg(repo_folder), "checkout", "-f", hash) + files = git( + __get_git_path_arg(repo_folder), "ls-tree", "-r", "--name-only", hash + ).splitlines() + + for file in files: + if file_pattern.match(file): + lines = git( + __get_git_path_arg(repo_folder), "blame", "--root", "-l", + f"{file}" + ).splitlines() + for line in lines: + if line: + last_change = line[:FullCommitHash.hash_length()] + try: + last_change = FullCommitHash(last_change) + except ValueError: + continue + + if lines_per_revision.keys().__contains__(last_change): + lines_per_revision[ + last_change] = lines_per_revision[last_change] + 1 + else: + lines_per_revision[last_change] = 1 + + git(__get_git_path_arg(repo_folder), "checkout", initial_head.name) + return lines_per_revision diff --git a/varats/varats/data/databases/author_interactions_database.py b/varats/varats/data/databases/author_interactions_database.py new file mode 100644 index 000000000..cd9fe07e6 --- /dev/null +++ b/varats/varats/data/databases/author_interactions_database.py @@ -0,0 +1,156 @@ +import typing as tp + +import pandas as pd + +from varats.data.cache_helper import build_cached_report_table +from varats.data.databases.evaluationdatabase import EvaluationDatabase +from varats.data.reports.blame_report import ( + gen_base_to_inter_commit_repo_pair_mapping, +) +from varats.experiments.vara.blame_report_experiment import ( + BlameReportExperiment, +) +from varats.jupyterhelper.file import load_blame_report +from varats.mapping.author_map import Author, generate_author_map +from varats.mapping.commit_map import CommitMap +from varats.paper.case_study import CaseStudy +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.project.project_util import ( + get_local_project_git_path, + get_primary_project_source, +) +from varats.report.report import ReportFilepath +from varats.revision.revisions import ( + get_processed_revisions_files, + get_failed_revisions_files, +) +from varats.utils.git_util import ( + create_commit_lookup_helper, + UNCOMMITTED_COMMIT_HASH, + CommitRepoPair, +) + + +class AuthorInteractionsDatabase( + EvaluationDatabase, + cache_id="author_contribution_data_base", + column_types={ + "author_name": 'str', + "author_mail": 'str', + "internal_interactions": 'int32', + "external_interactions": 'int32' + } +): + """Provides access to internal and external interactions of authors.""" + + @classmethod + def _load_dataframe( + cls, project_name: str, commit_map: CommitMap, + case_study: tp.Optional[CaseStudy], **kwargs: tp.Dict[str, tp.Any] + ) -> pd.DataFrame: + + def create_dataframe_layout() -> pd.DataFrame: + df_layout = pd.DataFrame(columns=cls.COLUMNS) + df_layout = df_layout.astype(cls.COLUMN_TYPES) + return df_layout + + def create_data_frame_for_report( + report_path: ReportFilepath + ) -> tp.Tuple[pd.DataFrame, str, str]: + report = load_blame_report(report_path) + base_inter_c_repo_pair_mapping = \ + gen_base_to_inter_commit_repo_pair_mapping(report) + revision = report.head_commit + + def build_dataframe_row( + author: Author, internal_interactions: int, + external_interactions: int + ) -> tp.Dict[str, tp.Any]: + data_dict: tp.Dict[str, tp.Any] = { + 'revision': revision.hash, + 'time_id': commit_map.short_time_id(revision), + 'author_name': author.name, + 'author_mail': author.mail, + 'internal_interactions': internal_interactions, + 'external_interactions': external_interactions + } + return data_dict + + result_data_dicts: tp.Dict[Author, tp.Dict[str, tp.Any]] = {} + amap = generate_author_map(project_name) + repo_name = get_primary_project_source(project_name).local + commit_lookup_helper = create_commit_lookup_helper(project_name) + for base_pair in base_inter_c_repo_pair_mapping: + if not base_pair.commit.repository_name.startswith(repo_name): + # Skip interactions with submodules + continue + inter_pair_dict = base_inter_c_repo_pair_mapping[base_pair] + if base_pair.commit.commit_hash == UNCOMMITTED_COMMIT_HASH: + continue + base_commit = commit_lookup_helper( + CommitRepoPair(base_pair.commit.commit_hash, repo_name) + ) + base_author = amap.get_author( + base_commit.author.name, base_commit.author.email + ) + if base_author is None: + amap.add_entry( + base_commit.author.name, base_commit.author.email + ) + base_author = amap.get_author( + base_commit.author.name, base_commit.author.email + ) + internal_interactions = 0 + external_interactions = 0 + for inter_pair, interactions in inter_pair_dict.items(): + if inter_pair.commit.commit_hash == UNCOMMITTED_COMMIT_HASH or not inter_pair.commit.repository_name.startswith( + repo_name + ): + continue + inter_commit = commit_lookup_helper( + CommitRepoPair( + inter_pair.commit.commit_hash, repo_name + ) + ) + inter_author = amap.get_author( + inter_commit.author.name, inter_commit.author.email + ) + if base_author == inter_author: + internal_interactions += interactions + else: + external_interactions += interactions + if base_author in result_data_dicts: + result_data_dicts[base_author]['internal_interactions' + ] += internal_interactions + result_data_dicts[base_author]['external_interactions' + ] += external_interactions + else: + result_data_dicts[base_author] = build_dataframe_row( + base_author, internal_interactions, + external_interactions + ) + + return pd.DataFrame( + list(result_data_dicts.values()) + ), report.head_commit.hash, str(report_path.stat().st_mtime_ns) + + report_files = get_processed_revisions_files( + project_name, + BlameReportExperiment, + file_name_filter=get_case_study_file_name_filter(case_study) + ) + + failed_report_files = get_failed_revisions_files( + project_name, + BlameReportExperiment, + file_name_filter=get_case_study_file_name_filter(case_study) + ) + + data_frame = build_cached_report_table( + cls.CACHE_ID, project_name, report_files, failed_report_files, + create_dataframe_layout, create_data_frame_for_report, + lambda path: path.report_filename.commit_hash.hash, + lambda path: str(path.stat().st_mtime_ns), + lambda a, b: int(a) > int(b) + ) + return data_frame diff --git a/varats/varats/data/databases/commit_interaction_aggregate_database.py b/varats/varats/data/databases/commit_interaction_aggregate_database.py new file mode 100644 index 000000000..6104a4aa8 --- /dev/null +++ b/varats/varats/data/databases/commit_interaction_aggregate_database.py @@ -0,0 +1,101 @@ +import typing as tp + +import pandas as pd + +from varats.data.cache_helper import build_cached_report_table +from varats.data.databases.evaluationdatabase import EvaluationDatabase +from varats.data.reports.blame_report import ( + gen_base_to_inter_commit_repo_pair_mapping, +) +from varats.experiments.vara.blame_report_experiment import ( + BlameReportExperiment, +) +from varats.jupyterhelper.file import load_blame_report +from varats.mapping.commit_map import CommitMap +from varats.paper.case_study import CaseStudy +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.report import ReportFilepath +from varats.revision.revisions import ( + get_processed_revisions_files, + get_failed_revisions_files, +) +from varats.utils.git_util import FullCommitHash + + +class SurvivingInteractionsDatabase( + EvaluationDatabase, + cache_id="survivng_interactions_data", + column_types={ + "base_hash": 'str', + "interactions": 'int32', + } +): + """Provides access to total interactions of commits.""" + + @classmethod + def _load_dataframe( + cls, project_name: str, commit_map: CommitMap, + case_study: tp.Optional[CaseStudy], **kwargs: tp.Dict[str, tp.Any] + ) -> pd.DataFrame: + + def create_dataframe_layout() -> pd.DataFrame: + df_layout = pd.DataFrame(columns=cls.COLUMNS) + df_layout = df_layout.astype(cls.COLUMN_TYPES) + return df_layout + + def create_data_frame_for_report( + report_path: ReportFilepath + ) -> tp.Tuple[pd.DataFrame, str, str]: + report = load_blame_report(report_path) + base_inter_c_repo_pair_mapping = \ + gen_base_to_inter_commit_repo_pair_mapping(report) + revision = report.head_commit + + def build_dataframe_row(chash: FullCommitHash, + interactions: int) -> tp.Dict[str, tp.Any]: + + data_dict: tp.Dict[str, tp.Any] = { + 'revision': revision.hash, + 'time_id': commit_map.short_time_id(revision), + 'base_hash': chash.hash, + 'interactions': interactions + } + return data_dict + + result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = [] + + for base_pair in base_inter_c_repo_pair_mapping: + inter_pair_amount_dict = base_inter_c_repo_pair_mapping[ + base_pair] + interactions_amount = sum(inter_pair_amount_dict.values()) + result_data_dicts.append( + build_dataframe_row( + chash=base_pair.commit.commit_hash, + interactions=interactions_amount + ) + ) + return pd.DataFrame(result_data_dicts + ), report.head_commit.hash, str( + report_path.stat().st_mtime_ns + ) + + report_files = get_processed_revisions_files( + project_name, + BlameReportExperiment, + file_name_filter=get_case_study_file_name_filter(case_study) + ) + + failed_report_files = get_failed_revisions_files( + project_name, + BlameReportExperiment, + file_name_filter=get_case_study_file_name_filter(case_study) + ) + + data_frame = build_cached_report_table( + cls.CACHE_ID, project_name, report_files, failed_report_files, + create_dataframe_layout, create_data_frame_for_report, + lambda path: path.report_filename.commit_hash.hash, + lambda path: str(path.stat().st_mtime_ns), + lambda a, b: int(a) > int(b) + ) + return data_frame diff --git a/varats/varats/data/databases/survivng_lines_database.py b/varats/varats/data/databases/survivng_lines_database.py new file mode 100644 index 000000000..da2a68085 --- /dev/null +++ b/varats/varats/data/databases/survivng_lines_database.py @@ -0,0 +1,72 @@ +import typing as tp + +import pandas as pd +from pygit2._pygit2 import GIT_SORT_TOPOLOGICAL + +from varats.data.cache_helper import load_cached_df_or_none, cache_dataframe +from varats.data.databases.evaluationdatabase import EvaluationDatabase +from varats.mapping.commit_map import CommitMap +from varats.paper.case_study import CaseStudy +from varats.project.project_util import get_local_project_git +from varats.utils.git_util import ( + calc_surviving_lines, + FullCommitHash, + ShortCommitHash, +) + + +class SurvivingLinesDatabase( + EvaluationDatabase, + cache_id="survivng_lines_data", + column_types={ + "commit_hash": 'str', + "lines": 'int32' + } +): + + @classmethod + def _load_dataframe( + cls, project_name: str, commit_map: CommitMap, + case_study: tp.Optional[CaseStudy], **kwargs: tp.Dict[str, tp.Any] + ) -> pd.DataFrame: + data_frame = load_cached_df_or_none( + cls.CACHE_ID, project_name, cls.COLUMN_TYPES + ) + project_repo = get_local_project_git(case_study.project_name) + revisions = case_study.revisions if case_study else [ + FullCommitHash.from_pygit_commit(commit) for commit in + project_repo.walk(project_repo.head.target, GIT_SORT_TOPOLOGICAL) + ] + data_dicts: tp.List[tp.Dict[str, tp.Any]] = [] + cached_revisions = data_frame.groupby("revision").groups.keys( + ) if data_frame is not None else set() + revisions_to_compute: tp.Set[str] = set( + map(lambda r: r.hash, revisions) + ) - cached_revisions + + for revision in revisions_to_compute: + lines_per_commit = calc_surviving_lines( + case_study.project_name, ShortCommitHash(revision) + ) + + def build_dataframe_row(chash: FullCommitHash, + lines: int) -> tp.Dict[str, tp.Any]: + data_dict: tp.Dict[str, tp.Any] = { + 'revision': revision, + 'time_id': commit_map.time_id(FullCommitHash(revision)), + 'commit_hash': chash.hash, + 'lines': lines + } + return data_dict + + for entry in lines_per_commit.items(): + data_dicts.append(build_dataframe_row(entry[0], entry[1])) + if data_frame is None: + data_frame = pd.DataFrame(data_dicts) + else: + data_frame = pd.concat([data_frame, + pd.DataFrame(data_dicts)], + ignore_index=True, + copy=False) + cache_dataframe(cls.CACHE_ID, project_name, data_frame) + return data_frame diff --git a/varats/varats/data/reports/blame_interaction_graph.py b/varats/varats/data/reports/blame_interaction_graph.py index dc490b9b4..ece03b4f1 100644 --- a/varats/varats/data/reports/blame_interaction_graph.py +++ b/varats/varats/data/reports/blame_interaction_graph.py @@ -29,6 +29,7 @@ ChurnConfig, UNCOMMITTED_COMMIT_HASH, FullCommitHash, + CommitHash, get_submodule_head, ) @@ -85,6 +86,17 @@ class CAIGEdgeAttrs(TypedDict): amount: int +class FIGNodeAttrs(TypedDict): + """Funition interaction graph node attributes.""" + function: tp.Optional[str] + num_commits: int + + +class FIGEdgeAttrs(TypedDict): + """Function interaction graph edge attributes.""" + amount: int + + class InteractionGraph(abc.ABC): """Graph/Network built from interaction data.""" @@ -299,6 +311,64 @@ def commit_author_interaction_graph( ]["amount"] += data["amount"] return caig + def function_interaction_graph(self): + """ + Return a digraph with functions as nodes and interactions as edges. + + Nodes can be referenced via their function name. + The graph has the following attributes: + Nodes: + - function: name of the function + - num_commits: number of commits aggregated in this node + Edges: + - amount: how often an interaction between two functions was found + + Returns: + the author interaction graph + """ + interaction_graph = self._interaction_graph() + + def partition(node_u: BIGNodeTy, node_v: BIGNodeTy): + return node_u.function_name == node_v.function_name + + def edge_data( + partition_a: tp.Set[BIGNodeTy], partition_b: tp.Set[BIGNodeTy] + ) -> FIGEdgeAttrs: + amount = 0 + interactions: tp.List[tp.Tuple[CommitRepoPair, CommitRepoPair]] = [] + for source in partition_a: + for sink in partition_b: + if interaction_graph.has_edge(source, sink): + amount += int(interaction_graph[source][sink]["amount"]) + interactions.append((source.commit, sink.commit)) + + return {"amount": amount} + + def node_data(nodes: tp.Set[BIGNodeTy]) -> FIGNodeAttrs: + functions = { + node.function_name if node.function_name else "Unknown" + for node in nodes + } + assert len(functions) == 1, "Some node has more then one function." + return { + "function": next(iter(functions)), + "num_commits": len(nodes) + } + + fig = nx.quotient_graph( + interaction_graph, + partition=partition, + edge_data=edge_data, + node_data=node_data, + create_using=nx.DiGraph + ) + relabel_dict: tp.Dict[tp.FrozenSet[BIGNodeTy], str] = {} + for node in fig.nodes: + relabel_dict[node] = tp.cast(AIGNodeAttrs, + fig.nodes[node])["function"] + nx.relabel_nodes(fig, relabel_dict, copy=False) + return fig + class BlameInteractionGraph(InteractionGraph): """Graph/Network built from blame interaction data.""" diff --git a/varats/varats/experiments/vara/blame_report_experiment.py b/varats/varats/experiments/vara/blame_report_experiment.py index 747e8d313..d0da10fc3 100644 --- a/varats/varats/experiments/vara/blame_report_experiment.py +++ b/varats/varats/experiments/vara/blame_report_experiment.py @@ -27,7 +27,7 @@ from varats.experiment.wllvm import get_cached_bc_file_path, BCFileExtensions from varats.project.project_util import get_local_project_git_paths from varats.project.varats_project import VProject -from varats.report.report import ReportSpecification +from varats.report.report import ReportSpecification, ReportFilename class BlameReportGeneration(actions.ProjectStep): # type: ignore @@ -147,6 +147,10 @@ def actions_for_project( return analysis_actions + @classmethod + def file_belongs_to_experiment(cls, file_name: ReportFilename) -> bool: + return file_name.experiment_shorthand in ["BRE", "BRER", "BRECIF"] + class BlameReportExperimentRegion(BlameReportExperiment, shorthand="BRER"): """Generates a blame report with region scoped taints.""" diff --git a/varats/varats/experiments/vara/feature_perf_runner.py b/varats/varats/experiments/vara/feature_perf_runner.py index ad6c3b424..851aa05a0 100644 --- a/varats/varats/experiments/vara/feature_perf_runner.py +++ b/varats/varats/experiments/vara/feature_perf_runner.py @@ -20,6 +20,7 @@ from varats.report.tef_report import TEFReport + class FeaturePerfRunner(FeatureExperiment, shorthand="FPR"): """Test runner for feature performance.""" diff --git a/varats/varats/gui/cs_gen/case_study_generation.py b/varats/varats/gui/cs_gen/case_study_generation.py index 7783f0132..efa83330d 100644 --- a/varats/varats/gui/cs_gen/case_study_generation.py +++ b/varats/varats/gui/cs_gen/case_study_generation.py @@ -7,6 +7,8 @@ import benchbuild as bb import pygit2 +from benchbuild import Experiment +from benchbuild.experiment import ExperimentRegistry from PyQt5.QtCore import ( QModelIndex, QDateTime, @@ -17,7 +19,10 @@ from PyQt5.QtGui import QColor from PyQt5.QtWidgets import QMainWindow, QApplication, QMessageBox +import varats.paper.paper_config as PC from varats.base.sampling_method import NormalSamplingMethod +from varats.data.databases.file_status_database import FileStatusDatabase +from varats.experiments.discover_experiments import initialize_experiments from varats.gui.cs_gen.case_study_generation_ui import Ui_MainWindow from varats.mapping.commit_map import get_commit_map, CommitMap from varats.paper.case_study import CaseStudy, store_case_study @@ -33,8 +38,10 @@ get_primary_project_source, ) from varats.projects.discover_projects import initialize_projects +from varats.report.report import FileStatusExtension from varats.revision.revisions import is_revision_blocked from varats.tools.research_tools.vara_manager import ProcessManager +from varats.ts_utils.click_param_types import is_experiment_excluded from varats.utils import settings from varats.utils.git_util import ( get_initial_commit, @@ -95,7 +102,16 @@ def __init__(self): self.commit_search.textChanged.connect( self.proxy_model.setFilterFixedString ) + self.cs_filter.stateChanged.connect(self.proxy_model.setCsFilter) + self.case_study.currentIndexChanged.connect( + self.proxy_model.update_case_study + ) self.show() + initialize_experiments() + self.experiment.addItems([ + k for k, v in ExperimentRegistry.experiments.items() + if not is_experiment_excluded(k) + ]) def update_project_list(self, filter_string: str = "") -> None: """Update the project list when a filter is applied.""" @@ -202,6 +218,7 @@ def revisions_of_project(self) -> None: GenerationStrategy.SELECT_REVISION.value ) if self.selected_project != self.revision_list_project: + self.case_study.clear() self.revision_details.setText("Loading Revisions") self.revision_details.repaint() # Update the local project git @@ -224,8 +241,22 @@ def revisions_of_project(self) -> None: cmap = get_commit_map(self.selected_project) commit_model = CommitTableModel( - list(map(commit_lookup_helper, commits)), cmap, project + list(map(commit_lookup_helper, commits)), cmap, project, + ExperimentRegistry.experiments[self.experiment.currentText()] + ) + self.proxy_model.setProject(project) + self.case_study.currentIndexChanged.connect( + commit_model.update_case_study ) + self.experiment.currentTextChanged.connect( + commit_model.update_experiment + ) + current_config = PC.get_paper_config() + case_studies = current_config.get_all_case_studies() + self.case_study.addItems([ + f"{cs.project_name}_{cs.version}" for cs in case_studies + if cs.project_name == self.selected_project + ]) self.proxy_model.setSourceModel(commit_model) self.revision_list_project = self.selected_project self.revision_details.clear() @@ -246,22 +277,39 @@ def show_revision_data(self, index: QModelIndex) -> None: class CommitTableFilterModel(QSortFilterProxyModel): """Filter Model for the revision table.""" filter_string = "" + cs_filter = False def setFilterFixedString(self, pattern: str) -> None: self.filter_string = pattern self.invalidate() + def update_case_study(self, index: int) -> None: + current_config = PC.get_paper_config() + case_studies = [ + cs for cs in current_config.get_all_case_studies() + if cs.project_name == self._project.NAME + ] + self._case_study = case_studies[index] + self.invalidate() + + def setProject(self, project: tp.Type['bb.Project']) -> None: + self._project = project + + def setCsFilter(self, cs_filter: bool) -> None: + self.cs_filter = cs_filter + self.invalidate() + def filterAcceptsRow( self, source_row: int, source_parent: QModelIndex ) -> bool: commit_index = self.sourceModel().index(source_row, 0, source_parent) author_index = self.sourceModel().index(source_row, 1, source_parent) - return self.sourceModel().data(commit_index, + return ((not self.cs_filter) or FullCommitHash(self.sourceModel().data(commit_index,Qt.WhatsThisRole).hex) in self._case_study.revisions) and (self.sourceModel().data(commit_index, Qt.DisplayRole).lower() \ .__contains__(self.filter_string.lower()) \ or self.sourceModel().data(author_index, Qt.DisplayRole).lower() \ - .__contains__(self.filter_string.lower()) + .__contains__(self.filter_string.lower())) class CommitTableModel(QAbstractTableModel): @@ -270,13 +318,51 @@ class CommitTableModel(QAbstractTableModel): def __init__( self, data: tp.List[pygit2.Commit], cmap: CommitMap, - project: tp.Type['bb.Project'] + project: tp.Type['bb.Project'], experiment_type: tp.Type[Experiment] ): super().__init__() self._project = project self._data = data + self._case_study: tp.Optional[CaseStudy] = None + self._experiment_type = experiment_type self._cmap = cmap + def update_case_study(self, index: int) -> None: + current_config = PC.get_paper_config() + case_studies = [ + cs for cs in current_config.get_all_case_studies() + if cs.project_name == self._project.NAME + ] + self._case_study = case_studies[index] + if self._experiment_type: + self._status_data = FileStatusDatabase.get_data_for_project( + self._case_study.project_name, ["revision", "file_status"], + self._cmap, + self._case_study, + experiment_type=self._experiment_type, + tag_blocked=False + ) + self._status_data.set_index("revision", inplace=True) + self.dataChanged.emit( + self.index(0, 0), self.index(self.rowCount(), self.columnCount()) + ) + + def update_experiment(self, index: str) -> None: + self._experiment_type = ExperimentRegistry.experiments[index] + if self._case_study: + self._status_data = FileStatusDatabase.get_data_for_project( + self._case_study.project_name, ["revision", "file_status"], + self._cmap, + self._case_study, + experiment_type=self._experiment_type, + tag_blocked=False + ) + self._status_data.set_index("revision", inplace=True) + + self.dataChanged.emit( + self.index(0, 0), self.index(self.rowCount(), self.columnCount()) + ) + def headerData(self, section, orientation, role=Qt.DisplayRole): if role == Qt.DisplayRole and orientation == Qt.Horizontal: return self.header_labels[section] @@ -311,6 +397,28 @@ def data(self, index: QModelIndex, role: int = Qt.DisplayRole) -> tp.Any: return QColor(50, 100, 255) if role == Qt.ToolTipRole: return "Blocked" + if self._case_study and self._experiment_type: + if role == Qt.ForegroundRole: + chash = ShortCommitHash(commit.hex) + if chash in self._status_data.index: + if self._status_data.loc[ + chash, "file_status" + ] == FileStatusExtension.SUCCESS.get_status_extension(): + return QColor(0, 255, 0) + elif self._status_data.loc[ + chash, "file_status" + ] == FileStatusExtension.FAILED.get_status_extension(): + return QColor(255, 0, 0) + elif self._status_data.loc[ + chash, "file_status" + ] == FileStatusExtension.COMPILE_ERROR.get_status_extension( + ): + return QColor(255, 0, 0) + elif self._status_data.loc[ + chash, "file_status" + ] == FileStatusExtension.MISSING.get_status_extension(): + return QColor(255, 255, 0) + if role == Qt.WhatsThisRole: return commit diff --git a/varats/varats/gui/cs_gen/case_study_generation_ui.py b/varats/varats/gui/cs_gen/case_study_generation_ui.py index 8b027219e..100bdfa72 100644 --- a/varats/varats/gui/cs_gen/case_study_generation_ui.py +++ b/varats/varats/gui/cs_gen/case_study_generation_ui.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Form implementation generated from reading ui file '../uicomponents/CaseStudyGeneration.ui' +# Form implementation generated from reading ui file 'VaRA-Tool-Suite/uicomponents/CaseStudyGeneration.ui' # # Created by: PyQt5 UI code generator 5.13.2 # @@ -14,7 +14,7 @@ class Ui_MainWindow(object): def setupUi(self, MainWindow): MainWindow.setObjectName("MainWindow") MainWindow.setEnabled(True) - MainWindow.resize(760, 443) + MainWindow.resize(760, 491) sizePolicy = QtWidgets.QSizePolicy( QtWidgets.QSizePolicy.MinimumExpanding, QtWidgets.QSizePolicy.MinimumExpanding @@ -87,6 +87,15 @@ def setupUi(self, MainWindow): self.revisions.setSizePolicy(sizePolicy) self.revisions.setObjectName("revisions") self.verticalLayout_3.addWidget(self.revisions) + self.case_study = QtWidgets.QComboBox(self.revisionsPage) + self.case_study.setObjectName("case_study") + self.verticalLayout_3.addWidget(self.case_study) + self.experiment = QtWidgets.QComboBox(self.revisionsPage) + self.experiment.setObjectName("experiment") + self.verticalLayout_3.addWidget(self.experiment) + self.cs_filter = QtWidgets.QCheckBox(self.revisionsPage) + self.cs_filter.setObjectName("cs_filter") + self.verticalLayout_3.addWidget(self.cs_filter) self.commit_search = QtWidgets.QLineEdit(self.revisionsPage) self.commit_search.setClearButtonEnabled(True) self.commit_search.setObjectName("commit_search") @@ -206,7 +215,7 @@ def setupUi(self, MainWindow): self.gridLayout.addWidget(self.projects, 0, 0, 1, 1) MainWindow.setCentralWidget(self.centralwidget) self.menubar = QtWidgets.QMenuBar(MainWindow) - self.menubar.setGeometry(QtCore.QRect(0, 0, 760, 22)) + self.menubar.setGeometry(QtCore.QRect(0, 0, 760, 34)) self.menubar.setObjectName("menubar") MainWindow.setMenuBar(self.menubar) self.statusbar = QtWidgets.QStatusBar(MainWindow) @@ -229,6 +238,7 @@ def retranslateUi(self, MainWindow): self.label_2.setText(_translate("MainWindow", "Casestudy Version")) self.generate.setText(_translate("MainWindow", "Generate")) self.revisions.setText(_translate("MainWindow", "Revisions")) + self.cs_filter.setText(_translate("MainWindow", "Filter CaseStudy´")) self.commit_search.setPlaceholderText( _translate("MainWindow", "Search") ) @@ -253,8 +263,8 @@ def retranslateUi(self, MainWindow): "\n" "\n" - "


" + "\n" + "


" ) ) diff --git a/varats/varats/plots/author_contribution_survival.py b/varats/varats/plots/author_contribution_survival.py new file mode 100644 index 000000000..3516abb92 --- /dev/null +++ b/varats/varats/plots/author_contribution_survival.py @@ -0,0 +1,262 @@ +"""Plot for author contributions over time.""" +import math +import typing as tp + +from matplotlib import pyplot as plt +from matplotlib import style +from pandas import DataFrame + +from varats.data.databases.author_interactions_database import ( + AuthorInteractionsDatabase, +) +from varats.data.databases.survivng_lines_database import SurvivingLinesDatabase +from varats.mapping.author_map import generate_author_map, Author +from varats.mapping.commit_map import get_commit_map +from varats.paper.case_study import CaseStudy +from varats.plot.plot import Plot +from varats.plot.plots import PlotConfig, PlotGenerator +from varats.plots.surviving_commits import HeatMapPlot +from varats.project.project_util import ( + get_primary_project_source, + get_local_project_git_path, +) +from varats.ts_utils.cli_util import make_cli_option +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY +from varats.utils.git_util import ( + FullCommitHash, + create_commit_lookup_helper, + UNCOMMITTED_COMMIT_HASH, + CommitRepoPair, +) + + +def _group_data_by_author( + project_name: str, data: DataFrame, sample_points_label: str, + data_points_label: str, value_label: str +) -> DataFrame: + commit_lookup_helper = create_commit_lookup_helper(project_name) + repo = get_primary_project_source(project_name).local + amap = generate_author_map(project_name) + + def author_data(commit_hash: str) -> tp.Optional[Author]: + if commit_hash == UNCOMMITTED_COMMIT_HASH.hash: + return None + commit = commit_lookup_helper( + CommitRepoPair(FullCommitHash(commit_hash), str(repo)) + ) + return amap.get_author(commit.author.name, commit.author.email) + + data = data.apply( + lambda x: [ + x[sample_points_label], + author_data(x[data_points_label]), x[value_label] + ], + axis=1, + result_type='broadcast' + ) + data = data.rename(columns={data_points_label: 'author'}) + return data.groupby(by=[sample_points_label, 'author'], + sort=False).sum(min_count=1).reset_index() + + +def get_interactions_per_author(case_study: CaseStudy) -> DataFrame: + """Returns a DataFrame with the number of interactions per author per + revision.""" + project_name = case_study.project_name + amap = generate_author_map(project_name) + data: DataFrame = AuthorInteractionsDatabase().get_data_for_project( + project_name, [ + "revision", "author_name", "internal_interactions", + "external_interactions" + ], get_commit_map(project_name), case_study + ) + data["author"] = data["author_name"].apply(amap.get_author_by_name) + data.drop(columns=["author_name"], inplace=True) + data["interactions" + ] = data["internal_interactions"] + data["external_interactions"] + return data + + +def get_lines_per_author(case_study: CaseStudy): + """Returns a DataFrame with the number of lines per author per revision.""" + project_name = case_study.project_name + data = SurvivingLinesDatabase.get_data_for_project( + project_name, ["revision", "commit_hash", "lines"], + get_commit_map(project_name), case_study + ) + return _group_data_by_author( + project_name, data, 'revision', 'commit_hash', 'lines' + ) + + +def author_interactions_normalized_per_revision( + case_study: CaseStudy, limit: int = 0 +) -> DataFrame: + """Returns a DataFrame with the number of interactions per author per + revision normalized by the total number of interactions per revision.""" + data: DataFrame = get_interactions_per_author(case_study) + ref_data = data.groupby(by=['revision'], + sort=False).interactions.sum(min_count=1) + cmap = get_commit_map(case_study.project_name) + data = data.apply( + lambda x: [ + cmap.short_time_id(x['revision']), + ( + x['internal_interactions'] / x['interactions'] + if not math.isnan(x['interactions']) else math.nan + ), + ( + x['external_interactions'] / x['interactions'] + if not math.isnan(x['interactions']) else math.nan + ), + x['author'], + ( + x['interactions'] * 100 / ref_data[x['revision']] + if not math.isnan(x['interactions']) else math.nan + ), + ], + axis=1, + result_type='broadcast' + ) + data.loc[data['interactions'] < limit, 'author'] = Author(-1, "other", "") + data = data.groupby(by=['revision', 'author'], + sort=False).sum(min_count=1).reset_index() + return data.pivot( + index="author", columns='revision', values='interactions' + ).astype(float) + + +def lines_per_author_normalized_per_revision( + case_study: CaseStudy, min_lines: int = 0 +): + """Returns a DataFrame with the number of lines per author per revision + normalized by the total number of lines per revision.""" + data = get_lines_per_author(case_study) + ref_data = data.groupby(by=['revision'], sort=False).lines.sum(min_count=1) + cmap = get_commit_map(case_study.project_name) + data = data.apply( + lambda x: [ + cmap.short_time_id(x['revision']), x['author'], + (x['lines'] * 100 / ref_data[x['revision']]) + if not math.isnan(x['lines']) else math.nan + ], + axis=1, + result_type='broadcast' + ) + data.loc[data['lines'] < min_lines, 'author'] = Author(-1, "other", "") + data = data.groupby(by=['revision', 'author'], + sort=False).lines.sum(min_count=1).reset_index() + return data.pivot(index="author", columns='revision', + values='lines').astype(float) + + +class ContributionPlot(Plot, plot_name=None): + """Base class for contribution plots.""" + + def __init__( + self, plot_config: PlotConfig, data_function, **kwargs: tp.Any + ): + super().__init__(plot_config, **kwargs) + self.data_column = [] + self.data_function = data_function + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + def plot(self, view: bool) -> None: + """Plots the contribution plot.""" + style.use(self.plot_config.get_dict()) + _, axis = plt.subplots(len(self.data_column), 1, sharex="col") + case_study = self.plot_kwargs['case_study'] + data = self.data_function(case_study, self.plot_kwargs["threshold"]) + data.sort_index( + axis=0, + level=0, + inplace=True, + key=lambda x: x.map(lambda y: y.author_id) + ) + data.fillna(0, inplace=True) + plt.rcParams.update({"text.usetex": True, "font.family": "Helvetica"}) + if len(self.data_column) > 1: + for i, column in enumerate(self.data_column): + plotdata = data.xs(column, level=1) + plotdata.T.plot.area( + ax=axis[i], ylabel=column, stacked=True, xticks=[] + ) + axis[i].set_ylabel(column.capitalize()) + axis[i].get_legend().remove() + else: + data.T.plot.area(ax=axis, stacked=True) + plt.xlabel("Revision") + plt.legend(fontsize=8, loc=2, bbox_to_anchor=(1, 2)) + + +class AuthorLineContribution( + ContributionPlot, plot_name="author_line_contribution" +): + """Contribution Plot for lines of authors.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + NAME = "author_line_contribution" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any): + super().__init__( + plot_config, lines_per_author_normalized_per_revision, **kwargs + ) + self.data_column = "lines" + + +class AuthorInteractionsContribution( + ContributionPlot, plot_name="author_interactions_contribution" +): + """Contribution Plot for interactions of authors.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + NAME = "author_interactions_contribution" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any): + super().__init__( + plot_config, author_interactions_normalized_per_revision, **kwargs + ) + self.data_column = "interactions" + + +class AuthorContributionPlotGenerator( + PlotGenerator, + generator_name="author-contribution", + options=[ + REQUIRE_MULTI_CASE_STUDY, + make_cli_option( + "--threshold", + default=10, + help="Threshold of contribution to group authors with less", + type=int + ) + ] +): + """Generates contribution plots.""" + + def name_addition(self, i: int) -> str: + return self.plot_kwargs["case_study"][i].project_name + + def generate(self) -> tp.List['varats.plot.plot.Plot']: + case_studys: tp.List[CaseStudy] = self.plot_kwargs["case_study"] + plots: tp.List[Plot] = [] + for case_study in case_studys: + kwargs = self.plot_kwargs.copy() + kwargs["case_study"] = case_study + plots.append(AuthorLineContribution(self.plot_config, **kwargs)) + plots.append( + AuthorInteractionsContribution(self.plot_config, **kwargs) + ) + return plots diff --git a/varats/varats/plots/commit_trend.py b/varats/varats/plots/commit_trend.py new file mode 100644 index 000000000..1db9a5e37 --- /dev/null +++ b/varats/varats/plots/commit_trend.py @@ -0,0 +1,162 @@ +import typing as tp + +import matplotlib.pyplot as plt +import seaborn as sns +from matplotlib.colors import SymLogNorm +from pandas import DataFrame + +from varats.mapping.commit_map import get_commit_map +from varats.paper.case_study import CaseStudy +from varats.plot.plot import Plot +from varats.plot.plots import PlotConfig, PlotGenerator +from varats.plots.surviving_commits import ( + get_lines_per_commit_long, + get_interactions_per_commit_long, +) +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY +from varats.utils.git_util import ( + FullCommitHash, + ShortCommitHash, + UNCOMMITTED_COMMIT_HASH, +) + + +class ChangesHeatMap(Plot, plot_name=None): + """plot trendlines.""" + + def plot(self, view_mode: bool) -> None: + case_study = self.plot_kwargs['case_study'] + cs_data = self.data_function(case_study, False) + cs_data["interactions_diff"] = \ + cs_data.groupby(self.columns_label, sort=False)[ + self.value_label].diff().astype(float) + if self.value_label == "lines": + vmin = cs_data["interactions_diff"].min() + vmax = 0 + else: + vmax = max( + cs_data["interactions_diff"].max(), + -cs_data["interactions_diff"].min() + ) + vmin = -vmax + cs_data.drop( + cs_data[cs_data[self.columns_label] == + UNCOMMITTED_COMMIT_HASH.to_short_commit_hash()].index, + inplace=True + ) + + cs_data = cs_data.pivot( + index=self.columns_label, + columns="revision", + values="interactions_diff" + ) + cmap = get_commit_map(case_study.project_name) + if self.columns_label == "base_hash": + cs_data.sort_index( + key=lambda x: x.map(cmap.short_time_id), inplace=True + ) + plt.rcParams.update({"text.usetex": True, "font.family": "Helvetica"}) + axis = sns.heatmap( + cs_data, + center=0, + cmap="RdYlGn", + vmax=vmax, + vmin=vmin, + norm=SymLogNorm(linthresh=0.01, vmax=vmax, vmin=vmin) + ) + plt.setp( + axis.get_yticklabels(), + family='monospace', + ) + new_labels = [ + f"\\texttt{{{i.get_text()[0:5]}}}" + if len(i.get_text()) > 5 else i.get_text() + for i in axis.yaxis.get_ticklabels() + ] + axis.set_yticklabels(new_labels) + axis.set_ylabel("Commits") + axis.set_xlabel("Revisions") + axis.set_xticklabels([]) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + def __init__( + self, + plot_config: PlotConfig, + data_function, + columns_label="base_hash", + value_label="interactions", + **kwargs + ): + super().__init__(plot_config, **kwargs) + self.color_commits = False + self.data_function = data_function + self.columns_label = columns_label + self.value_label = value_label + + +def interactions_and_lines_per_commit_wrapper( + case_study: CaseStudy, cs_filter=True +): + print(f"Getting Lines per commit for {case_study.project_name}") + lines: DataFrame = get_lines_per_commit_long(case_study, cs_filter) + print("Getting Interactions") + interactions: DataFrame = get_interactions_per_commit_long( + case_study, cs_filter + ) + print("Merging") + data = lines.merge(interactions, how='right', on=["base_hash", "revision"]) + data.dropna( + axis=0, how='any', inplace=True, subset=["lines", "interactions"] + ) + cmap = get_commit_map(case_study.project_name) + data = data.apply( + lambda x: ( + cmap.short_time_id(x["revision"]), ShortCommitHash(x["base_hash"]), + x["lines"], x["interactions"] + ), + axis=1, + result_type="broadcast" + ) + return data.sort_values(by="revision") + + +class InteractionChangeHeatmap( + ChangesHeatMap, plot_name="interactions-change-heatmap" +): + + def __init__(self, plot_config: PlotConfig, **kwargs): + super().__init__( + plot_config, interactions_and_lines_per_commit_wrapper, **kwargs + ) + + +class LineChangeHeatmap(ChangesHeatMap, plot_name="line-change-heatmap"): + + def __init__(self, plot_config: PlotConfig, **kwargs): + super().__init__( + plot_config, + interactions_and_lines_per_commit_wrapper, + value_label="lines", + **kwargs + ) + + +class ChangesMapGenerator( + PlotGenerator, + generator_name="change-map", + options=[REQUIRE_MULTI_CASE_STUDY] +): + + def generate(self) -> tp.List['Plot']: + case_studys: tp.List[CaseStudy] = self.plot_kwargs["case_study"] + plots: tp.List[Plot] = [] + for case_study in case_studys: + kwargs = self.plot_kwargs.copy() + kwargs["case_study"] = case_study + plots.append(InteractionChangeHeatmap(self.plot_config, **kwargs)) + plots.append(LineChangeHeatmap(self.plot_config, **kwargs)) + return plots diff --git a/varats/varats/plots/interactions_change_distribution.py b/varats/varats/plots/interactions_change_distribution.py new file mode 100644 index 000000000..4fc13efa8 --- /dev/null +++ b/varats/varats/plots/interactions_change_distribution.py @@ -0,0 +1,127 @@ +"""Plot the distributions of change in interactions and lines with commits as +units of space.""" +import typing as tp + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +from varats.paper.case_study import CaseStudy +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.plots.commit_trend import ( + lines_per_interactions_squashed, + lines_per_interactions_author, +) +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY +from varats.utils.git_util import FullCommitHash + + +class InteractionChangeDistribution( + Plot, plot_name="interactions_change_distribution" +): + """Plot the distributions of change in interactions of commits.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + def plot(self, view_mode: bool) -> None: + case_studys: tp.List[CaseStudy] = self.plot_kwargs["case_study"] + data = pd.DataFrame({ + "base_hash": [], + "interactions_diff": [], + "project": [] + }) + plt.rcParams.update({"text.usetex": True, "font.family": "Helvetica"}) + for case_study in case_studys: + cs_data = lines_per_interactions_squashed(case_study, True) + cs_data["interactions_diff"] = cs_data.groupby( + "base_hash", sort=False + )["interactions"].diff().astype(float) + cs_data.insert(2, "project", case_study.project_name) + data = pd.concat([data, cs_data], + ignore_index=True, + copy=False, + join="inner") + data_sub = data.groupby(["base_hash", "project"], + sort=False)["interactions_diff"].sum() + + df = data_sub.to_frame().reset_index() + df["interactions_diff"] = df["interactions_diff"].apply(lambda x: x + 1) + df.sort_values(by=["project"], inplace=True) + df["project"] = df["project"].apply(lambda x: f"\\textsc{{{x}}}") + axis = sns.violinplot( + data=df, + y="interactions_diff", + x="project", + bw=0.15, + scale="width", + inner=None, + cut=0, + ) + axis.plot(range(len(case_studys)), [1 for _ in case_studys], "--k") + + axis.set_ylabel( + "Change in $\\frac{interactions}{lines}$", + fontsize=self.plot_config.font_size() + ) + axis.set_xlabel("Projects", fontsize=self.plot_config.font_size()) + plt.gcf().set_size_inches(10, 5) + plt.yscale("asinh") + + +class InteractionChangeAuthorDistribution( + Plot, plot_name="interactions_change_distribution" +): + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + def plot(self, view_mode: bool) -> None: + case_studys: tp.List[CaseStudy] = self.plot_kwargs["case_study"] + data = pd.DataFrame({ + "author": [], + "interactions_diff": [], + "project": [] + }) + plt.rcParams.update({"text.usetex": True, "font.family": "Helvetica"}) + for case_study in case_studys: + cs_data = lines_per_interactions_author(case_study) + cs_data["interactions_diff"] = cs_data.groupby( + "author", sort=False + )["interactions"].diff().astype(float) + cs_data.insert( + 2, "project", f"\\textsc{{{case_study.project_name}}}" + ) + data = pd.concat([data, cs_data], + ignore_index=True, + copy=False, + join="inner") + data_sub = data.groupby(["author", "project"], + sort=False)["interactions_diff"].sum() + df = data_sub.to_frame().reset_index() + axis = sns.violinplot( + data=df, y="interactions_diff", x="project", bw=0.1, scale="width" + ) + plt.scale('asinh') + axis.set_ylabel("Change in \\frac{interactions}{line}") + axis.set_xlabel("Projects") + + +class InteractionChangeDistributionGenerator( + PlotGenerator, + generator_name="change-distribution", + options=[REQUIRE_MULTI_CASE_STUDY] +): + + def generate(self) -> tp.List['varats.plot.plot.Plot']: + return [ + InteractionChangeDistribution(self.plot_config, **self.plot_kwargs), + InteractionChangeAuthorDistribution( + self.plot_config, **self.plot_kwargs + ) + ] diff --git a/varats/varats/plots/project_evolution.py b/varats/varats/plots/project_evolution.py new file mode 100644 index 000000000..eb5275be4 --- /dev/null +++ b/varats/varats/plots/project_evolution.py @@ -0,0 +1,81 @@ +import typing as tp + +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt +import pandas as pd +from pandas import DataFrame + +from varats.data.databases.blame_library_interactions_database import ( + BlameLibraryInteractionsDatabase, +) +from varats.data.databases.survivng_lines_database import SurvivingLinesDatabase +from varats.mapping.commit_map import get_commit_map +from varats.plot.plot import Plot +from varats.plot.plots import PlotConfig, PlotGenerator +from varats.ts_utils.click_param_types import REQUIRE_CASE_STUDY +from varats.utils.git_util import FullCommitHash + + +class ProjectEvolutionPlot(Plot, plot_name='project_evolution'): + + NAME = 'project-evolution' + + def plot(self, view_mode: bool) -> None: + case_study = self.plot_kwargs['case_study'] + project_name = case_study.project_name + lines: DataFrame = SurvivingLinesDatabase.get_data_for_project( + project_name, ["revision", "commit_hash", "lines"], + get_commit_map(project_name), case_study + ).rename(columns={'commit_hash': 'base_hash'}) + + interactions: DataFrame = BlameLibraryInteractionsDatabase( + ).get_data_for_project( + project_name, ["base_hash", "amount", "revision", "base_lib"], + get_commit_map(project_name), case_study + ).rename(columns={'amount': 'interactions'}) + data = lines.merge( + interactions, how='left', on=["base_hash", "revision"] + ) + data.drop(['base_hash'], inplace=True, axis='columns') + df: pd.DataFrame = data.groupby(by=['revision'], sort=False).sum() + df.reset_index(inplace=True) + print(df) + _, axis = plt.subplots(1, 1) + plt.setp( + axis.get_xticklabels(), fontsize=self.plot_config.x_tick_size() + ) + plt.setp( + axis.get_yticklabels(), fontsize=self.plot_config.x_tick_size() + ) + ax = axis.twinx() + plt.setp(ax.get_yticklabels(), fontsize=self.plot_config.x_tick_size()) + x_axis = range(len(df)) + ax.scatter(x_axis, df['lines'], color="green") + axis.scatter(x_axis, df['interactions'], color="orange") + ax.set_ylim(ymin=0) + axis.set_ylim(ymin=0) + lines_legend = mpatches.Patch(color='green', label="Lines") + interactions_legend = mpatches.Patch( + color="orange", label='Interactions' + ) + plt.legend(handles=[lines_legend, interactions_legend]) + plt.ticklabel_format(axis='x', useOffset=False) + plt.xticks(x_axis, df['revision']) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any) -> None: + super().__init__(plot_config, **kwargs) + + +class ProjectEvolutionPlotGenerator( + PlotGenerator, + generator_name="project-evolution", + options=[REQUIRE_CASE_STUDY] +): + + def generate(self) -> tp.List['Plot']: + return [ProjectEvolutionPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/revision_impact.py b/varats/varats/plots/revision_impact.py new file mode 100644 index 000000000..1071bb2e4 --- /dev/null +++ b/varats/varats/plots/revision_impact.py @@ -0,0 +1,207 @@ +"""Plots for the impact of revisions.""" +import typing as tp + +import pandas as pd +import seaborn as sns +from matplotlib import pyplot as plt + +from varats.data.metrics import apply_tukeys_fence +from varats.mapping.author_map import generate_author_map +from varats.mapping.commit_map import get_commit_map, CommitMap +from varats.paper.case_study import CaseStudy +from varats.plot.plot import Plot +from varats.plot.plots import PlotConfig, PlotGenerator +from varats.plots.commit_trend import interactions_and_lines_per_commit_wrapper +from varats.plots.scatter_plot_utils import multivariate_grid +from varats.plots.surviving_commits import get_lines_per_commit_long +from varats.project.project_util import get_primary_project_source +from varats.ts_utils.cli_util import make_cli_option +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY +from varats.utils.git_util import ( + ShortCommitHash, + FullCommitHash, + create_commit_lookup_helper, + CommitRepoPair, +) + + +def revision_impact(case_study: CaseStudy) -> pd.DataFrame: + """Returns a dataframe with the impact of each revision of a single + case_study.""" + interaction_data = interactions_and_lines_per_commit_wrapper( + case_study, False + ) + interaction_data["interactions_diff"] = \ + interaction_data.groupby("base_hash", sort=False)[ + "interactions"].diff().astype(float) + interaction_data["lines_diff"] = \ + interaction_data.groupby("base_hash", sort=False)[ + "lines"].diff().astype(float) + interaction_data.drop(columns=["base_hash"], inplace=True) + interaction_data["interactions_diff"] = interaction_data["interactions_diff" + ].abs() + interaction_data["lines_diff"] = interaction_data["lines_diff"].abs() + data = interaction_data.groupby("revision").agg({ + "interactions": "sum", + "interactions_diff": "sum", + "lines_diff": "sum", + "lines": "sum" + }) + data["interaction_change" + ] = data["interactions_diff"] / data["interactions"] + data["line_change"] = data["lines_diff"] / data["lines"] + impacted = pd.NamedAgg( + column="interactions_diff", + aggfunc=lambda column: column[column > 1].count() / column.count() + ) + data["impacted_commits"] = interaction_data.groupby("revision").agg( + impacted_commits=impacted + )["impacted_commits"] + return data.reset_index().astype(float) + + +def impact_data(case_studys: tp.List[CaseStudy]) -> pd.DataFrame: + """Returns a dataframe with the impact of each revision of a list of + case_studies.""" + data = pd.DataFrame({ + "revision": [], + "interactions": [], + "interactions_diff": [], + "interaction_change": [], + "lines": [], + "lines_diff": [], + "line_change": [], + "impacted_commits": [], + "project": [], + }) + for case_study in case_studys: + cs_data = revision_impact(case_study) + cs_data.insert(1, "project", case_study.project_name) + data = pd.concat([data, cs_data], + ignore_index=True, + copy=False, + join="inner") + return data + + +def calc_missing_revisions_impact( + case_studys: tp.List[CaseStudy], boundary_gradient: float +) -> tp.Set[FullCommitHash]: + commit_map: CommitMap = get_commit_map(case_studys[0].project_name) + + def head_cm_neighbours(lhs_cm: int, rhs_cm: int) -> bool: + return lhs_cm + 1 == rhs_cm + + new_revs: tp.Set[FullCommitHash] = set() + + data = impact_data(case_studys) + data.fillna(value=0, inplace=True) + df_iter = data.iterrows() + _, last_row = next(df_iter) + for _, row in df_iter: + change = row["impacted_commits"] + if change > (boundary_gradient): + lhs_cm = last_row["revision"] + rhs_cm = row["revision"] + if head_cm_neighbours(lhs_cm, rhs_cm): + print( + "Found steep gradient between neighbours " + + f"{lhs_cm} - {rhs_cm}: {round(change, 5)}" + ) + else: + print( + "Unusual gradient between " + + f"{lhs_cm} - {rhs_cm}: {round(change, 5)}" + ) + new_rev_id = round((lhs_cm + rhs_cm) / 2.0) + new_rev = commit_map.c_hash(new_rev_id) + print(f"-> Adding {new_rev} as new revision to the sample set") + new_revs.add(new_rev) + print() + last_row = row + return new_revs + + +class RevisionImpactScatterLines(Plot, plot_name="revision_impact_lines"): + """Plots the impact of each revision compared to its line change of a list + of case_studies.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + return calc_missing_revisions_impact( + self.plot_kwargs["case_study"], boundary_gradient + ) + + def plot(self, view_mode: bool) -> None: + case_studys: tp.List[CaseStudy] = self.plot_kwargs["case_study"] + data = self.plot_kwargs["data"] + data.fillna(value=0, inplace=True) + if len(case_studys) == 1: + data = data.loc[data["project"] == case_studys[0].project_name] + cmap = get_commit_map(case_studys[0].project_name) + commit_helper = create_commit_lookup_helper( + case_studys[0].project_name + ) + repo = get_primary_project_source(case_studys[0].project_name).local + amap = generate_author_map(case_studys[0].project_name) + data["project"] = data["revision"].apply( + lambda x: amap.get_author_by_name( + commit_helper(CommitRepoPair(cmap.c_hash(x), repo)).author. + name + ) + ) + data = apply_tukeys_fence(data, "line_change", 3) + data["project"] = data["project"].apply(lambda x: f"\\textsc{{{x}}}") + + plt.rcParams.update({"text.usetex": True, "font.family": "Helvetica"}) + with sns.color_palette("husl", 12): + grid = multivariate_grid( + data, + "impacted_commits", + "line_change", + "project", + ) + grid.set_axis_labels( + "Impact", + "RelativeChurn", + fontsize=self.plot_config.font_size() + ) + plt.gcf().set_size_inches(10, 5) + ymax = data["line_change"].max() + plt.ylim(-0.001, ymax + 0.01) + + +class RevisionImpactGenerator( + PlotGenerator, + generator_name="revision-impact", + options=[ + REQUIRE_MULTI_CASE_STUDY, + make_cli_option( + "--individual", + help="Generate additional plots for each case study.", + type=bool, + default=False, + is_flag=True + ) + ] +): + """Generates a plot that shows the impact of each revision of a list of + case_studies.""" + + def generate(self) -> tp.List['varats.plot.plot.Plot']: + case_studys: tp.List[CaseStudy] = self.plot_kwargs["case_study"] + plots: tp.List[Plot] = [] + self.plot_kwargs["data"] = impact_data(case_studys) + if self.plot_kwargs["individual"]: + for case_study in case_studys: + kwargs = self.plot_kwargs.copy() + kwargs["case_study"] = [case_study] + plots.append( + RevisionImpactScatterLines(self.plot_config, **kwargs) + ) + + plots.append( + RevisionImpactScatterLines(self.plot_config, **self.plot_kwargs) + ) + return plots diff --git a/varats/varats/plots/scatter_plot_utils.py b/varats/varats/plots/scatter_plot_utils.py index 3510d3da4..b44a62ba8 100644 --- a/varats/varats/plots/scatter_plot_utils.py +++ b/varats/varats/plots/scatter_plot_utils.py @@ -13,6 +13,7 @@ def multivariate_grid( y: str, hue: str, global_kde: bool = True, + individual_kde: bool = True, **kwargs: tp.Any ) -> sns.JointGrid: """ @@ -55,20 +56,21 @@ def multivariate_grid( ax.xaxis.label.set_size(25) ax.yaxis.label.set_size(25) ax.tick_params(labelsize=15) - sns.kdeplot( - data=df_group, - x=x, - ax=grid.ax_marg_x, - fill=True, - warn_singular=False - ) - sns.kdeplot( - data=df_group, - y=y, - ax=grid.ax_marg_y, - fill=True, - warn_singular=False - ) + if individual_kde: + sns.kdeplot( + data=df_group, + x=x, + ax=grid.ax_marg_x, + fill=True, + warn_singular=False + ) + sns.kdeplot( + data=df_group, + y=y, + ax=grid.ax_marg_y, + fill=True, + warn_singular=False + ) if global_kde: sns.kdeplot( data=data, diff --git a/varats/varats/plots/surviving_commits.py b/varats/varats/plots/surviving_commits.py new file mode 100644 index 000000000..d56dd577b --- /dev/null +++ b/varats/varats/plots/surviving_commits.py @@ -0,0 +1,504 @@ +"""Plots for the analysis of loc and interactions over time.""" +import math +import typing as tp + +import click +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns +from matplotlib import style +from pandas import DataFrame +from pygtrie import CharTrie + +from varats.data.databases.commit_interaction_aggregate_database import ( + SurvivingInteractionsDatabase, +) +from varats.data.databases.survivng_lines_database import SurvivingLinesDatabase +from varats.mapping.commit_map import get_commit_map, CommitMap +from varats.paper.case_study import CaseStudy +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator, PlotConfig +from varats.project.project_util import get_primary_project_source +from varats.ts_utils.click_param_types import REQUIRE_CASE_STUDY +from varats.utils.git_util import ( + ShortCommitHash, + FullCommitHash, + create_commit_lookup_helper, + CommitRepoPair, +) + + +def get_lines_per_commit_long( + case_study: CaseStudy, filter_cs=True +) -> DataFrame: + """Get a data frame with the surviving lines of each commit at the revisions + of a case study in the format Revision | Commit | Lines.""" + project_name = case_study.project_name + data = SurvivingLinesDatabase.get_data_for_project( + project_name, ["revision", "commit_hash", "lines"], + get_commit_map(project_name), case_study + ) + data.rename(columns={'commit_hash': 'base_hash'}, inplace=True) + + def cs_filter(data_frame: DataFrame) -> DataFrame: + """Filter out all commits that are not in the case study if one was + selected.""" + if case_study is None or data_frame.empty: + return data_frame + # use a trie for fast prefix lookup + revisions = CharTrie() + for revision in case_study.revisions: + revisions[revision.hash] = True + return data_frame[ + data_frame["base_hash"].apply(lambda x: revisions.has_node(x) != 0)] + + return cs_filter(data) if filter_cs else data + + +def get_normalized_lines_per_commit_long( + case_study: CaseStudy, filter_cs=True +) -> DataFrame: + """Get a data frame with the surviving lines of each commit at the revisions + of a case study normalized by the maximum lines of the commit in the format + Revision | Commit | Lines.""" + data = get_lines_per_commit_long(case_study, filter_cs) + max_lines = data.drop(columns=["revision"]).groupby("base_hash").max() + data = data.apply( + lambda x: [ + x['revision'], x['base_hash'], + (x['lines'] * 100 / max_lines['lines'][x['base_hash']]) + ], + axis=1, + result_type='broadcast' + ) + + return data + + +def get_normalized_lines_per_commit_wide(case_study: CaseStudy) -> DataFrame: + """ + Get a data frame with the surviving lines of each commit at the revisions of + a case study normalized by the maximum lines of the commit. + + each commit is a row , the revisions are the columns. + """ + case_study_data = get_normalized_lines_per_commit_long(case_study) + case_study_data = case_study_data.pivot( + index="base_hash", columns='revision', values='lines' + ) + cmap = get_commit_map(case_study.project_name) + case_study_data.sort_index( + key=lambda x: x.map(lambda y: cmap.short_time_id(ShortCommitHash(y))), + inplace=True + ) + case_study_data.sort_index( + axis=1, key=lambda x: x.map(cmap.short_time_id), inplace=True + ) + + return case_study_data.astype(float) + + +def get_interactions_per_commit_long(case_study: CaseStudy, filter_cs=True): + """Get a data frame with the surviving interactions of each commit at the + revisions of a case study in the format Revision | Commit | Interactions.""" + project_name = case_study.project_name + data = SurvivingInteractionsDatabase.get_data_for_project( + project_name, ["revision", "base_hash", "interactions"], + get_commit_map(project_name), case_study + ) + + def cs_filter(data_frame: DataFrame) -> DataFrame: + """Filter out all commits that are not in the case study if one was + selected.""" + if case_study is None or data_frame.empty: + return data_frame + # use a trie for fast prefix lookup + revisions = CharTrie() + for revision in case_study.revisions: + revisions[revision.hash] = True + return data_frame[ + data_frame["base_hash"].apply(lambda x: revisions.has_node(x) != 0)] + + return cs_filter(data) if filter_cs else data + + +def get_normalized_interactions_per_commit_long( + case_study: CaseStudy, filter_cs=True +) -> DataFrame: + """Get a data frame with the surviving interactions of each commit at the + revisions of a case study normalized by the maximum interactions of the + commit in the format Revision | Commit | Interactions.""" + data = get_interactions_per_commit_long(case_study, filter_cs) + max_interactions = data.drop(columns=["revision"] + ).groupby("base_hash").max() + data = data.apply( + lambda x: [ + x['base_hash'], x['revision'], + ( + x['interactions'] * 100 / max_interactions['interactions'][x[ + 'base_hash']] + ) if max_interactions['interactions'][x['base_hash']] is not math. + nan else math.nan + ], + axis=1, + result_type='broadcast' + ) + + return data + + +def get_normalized_interactions_per_commit_wide( + case_study: CaseStudy +) -> DataFrame: + """ + Get a data frame with the surviving interactions of each commit at the + revisions of a case study normalized by the maximum interactions of the + commit. + + each commit is a row , the revisions are the columns. + """ + data = get_normalized_interactions_per_commit_long(case_study) + data = data.pivot( + index="base_hash", columns="revision", values="interactions" + ) + cmap = get_commit_map(case_study.project_name) + data.sort_index( + key=lambda x: x.map(lambda y: cmap.short_time_id(ShortCommitHash(y))), + inplace=True + ) + data.sort_index( + axis=1, key=lambda x: x.map(cmap.short_time_id), inplace=True + ) + return data.astype(float) + + +def lines_and_interactions(case_study: CaseStudy) -> DataFrame: + """ + Get a data frame with the surviving lines and interactions of each commit at + the revisions of a case study each commit is a row , the revisions are the + columns, lines and interactions are sublevels. + + An additionla sublevel called space is added for better readability when + plotted. + """ + lines: DataFrame = get_normalized_lines_per_commit_long(case_study) + + interactions: DataFrame = get_normalized_interactions_per_commit_long( + case_study + ) + data = lines.merge(interactions, how='left', on=["base_hash", "revision"]) + data.dropna( + axis=0, how='any', inplace=True, subset=["lines", "interactions"] + ) + data.insert(3, "space", np.nan) + data = data.pivot( + index="base_hash", + columns="revision", + values=["lines", "interactions", 'space'] + ) + data = data.stack(level=0, dropna=False) + cmap = get_commit_map(case_study.project_name) + data.sort_index( + level=0, + key=lambda x: x.map(lambda y: cmap.short_time_id(ShortCommitHash(y))), + inplace=True + ) + data.sort_index( + axis=1, key=lambda x: x.map(cmap.short_time_id), inplace=True + ) + return data.astype(float) + + +def get_author_color_map(data, case_study) -> dict[tp.Any, tp.Any]: + """Generate a color map for authors to collor commits based on theyr + authors.""" + commit_lookup_helper = create_commit_lookup_helper(case_study.project_name) + author_set: set = set() + for commit_hash in data.index.get_level_values(0): + repo = get_primary_project_source(case_study.project_name).local + commit = commit_lookup_helper( + CommitRepoPair(FullCommitHash(commit_hash), repo) + ) + author_set.add(commit.author.name) + author_list = list(author_set) + colormap = plt.get_cmap("nipy_spectral") + colors = colormap(np.linspace(0, 1, len(author_list))) + return dict(zip(author_list, colors)) + + +class SingleCommitPlot(Plot, plot_name="single_commit_survival"): + """Plot for the evolution of a single commit.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + def __init__( + self, plot_config: PlotConfig, revision: str, **kwargs: tp.Any + ): + self._revision = revision + super().__init__(plot_config, **kwargs) + + @property + def name(self) -> str: + return "single_commit_survival_" + ShortCommitHash(self._revision).hash + + def plot(self, view_mode: bool) -> None: + """Plot the evolution of a single commit.""" + _, axis = plt.subplots(1, 1) + plt.rcParams.update({"text.usetex": True, "font.family": "Helvetica"}) + case_study = self.plot_kwargs['case_study'] + lines: DataFrame = get_lines_per_commit_long(case_study, False) + + interactions: DataFrame = get_interactions_per_commit_long( + case_study, False + ) + data = lines.merge( + interactions, how='left', on=["base_hash", "revision"] + ) + data.dropna( + axis=0, how='any', inplace=True, subset=["lines", "interactions"] + ) + + data = data[ + data["base_hash"].apply(lambda x: x.startswith(self._revision))] + data.set_index("revision", inplace=True) + cmap = get_commit_map(case_study.project_name) + data.sort_index( + axis=0, key=lambda x: x.map(cmap.short_time_id), inplace=True + ) + data.drop(columns="base_hash") + _, axis = plt.subplots(1, 1) + plt.setp( + axis.get_xticklabels(), + fontsize=self.plot_config.x_tick_size(), + family='monospace', + ) + ax = axis.twinx() + x_axis = range(len(data)) + ax.scatter(x_axis, data['lines'], color="green") + ax.set_ylabel("Lines", color="g", fontsize=14) + axis.scatter(x_axis, data['interactions'], color="orange") + axis.set_ylabel("Interactions", color="orange", fontsize=14) + axis.tick_params(axis="y", labelsize=14) + ax.set_ylim(ymin=0) + axis.set_ylim(ymin=0) + axis.set_xlabel("Revisions", fontsize=14) + axis.set_xticklabels([]) + plt.xticks(fontsize=14) + plt.yticks(fontsize=14) + + +class HeatMapPlot(Plot, plot_name=None): + """ + Base Heatmap plot for to plot wide data frames. + + Subclases need to provide data functions to get the dataframes. + """ + colormap = 'RdYlGn' + vmin = 0 + vmax = 100 + xticklabels = 1 + yticklabels = 1 + XLABEL = "Sampled revisions" + y_label = None + + def __init__( + self, plot_config: PlotConfig, + data_function: tp.Callable[[CaseStudy], DataFrame], **kwargs + ): + super().__init__(plot_config, **kwargs) + self.color_commits = False + self.data_function = data_function + + def plot(self, view_mode: bool) -> None: + """Plot the heatmap.""" + style.use(self.plot_config.get_dict()) + _, axis = plt.subplots(1, 1) + case_study = self.plot_kwargs['case_study'] + data = self.data_function(case_study) + axis.set_title(case_study.project_name.capitalize()) + axis = sns.heatmap( + data, + cmap=self.colormap, + vmin=self.vmin, + vmax=self.vmax, + xticklabels=self.xticklabels, + yticklabels=self.yticklabels, + linewidth=0.1, + linecolor="grey" + ) + if self.XLABEL: + axis.set_xlabel(self.XLABEL) + if self.y_label: + axis.set_ylabel(self.y_label) + if self.color_commits: + color_map = get_author_color_map(data, case_study) + commit_lookup_helper = create_commit_lookup_helper( + case_study.project_name + ) + repo = get_primary_project_source(case_study.project_name).local + for label in axis.get_yticklabels(): + commit = commit_lookup_helper( + CommitRepoPair(FullCommitHash(label.get_text()), repo) + ) + label.set( + color=color_map[commit.author.name], + text=label.get_text()[:ShortCommitHash.hash_length()] + " █" + ) + legend = [] + for author, color in color_map.items(): + legend.append(mpatches.Patch(color=color, label=author)) + plt.legend( + fontsize=8, + handles=legend, + bbox_to_anchor=(1.2, 0.5), + loc=2, + borderaxespad=0. + ) + plt.setp( + axis.get_xticklabels(), + fontsize=self.plot_config.x_tick_size() - 1, + family='monospace', + ) + plt.setp( + axis.get_yticklabels(), + fontsize=self.plot_config.x_tick_size(), + family='monospace' + ) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + """Calculate.""" + commit_map: CommitMap = get_commit_map( + self.plot_kwargs['case_study'].project_name + ) + + def head_cm_neighbours( + lhs_cm: ShortCommitHash, rhs_cm: ShortCommitHash + ) -> bool: + return commit_map.short_time_id( + lhs_cm + ) + 1 == commit_map.short_time_id(rhs_cm) + + new_revs: tp.Set[FullCommitHash] = set() + + data = self.data_function(self.plot_kwargs['case_study']) + data.fillna(value=0, inplace=True) + df_iter = data.items() + last_revision, last_column = next(df_iter) + for revision, column in df_iter: + gradient = abs(column - last_column) + if any(gradient > (boundary_gradient * 100)): + lhs_cm = last_revision + rhs_cm = revision + if head_cm_neighbours(lhs_cm, rhs_cm): + print( + "Found steep gradient between neighbours " + + f"{lhs_cm} - {rhs_cm}: {round(max(gradient), 5)}" + ) + else: + print( + "Unusual gradient between " + + f"{lhs_cm} - {rhs_cm}: {round(max(gradient), 5)}" + ) + new_rev_id = round(( + commit_map.short_time_id(lhs_cm) + + commit_map.short_time_id(rhs_cm) + ) / 2.0) + new_rev = commit_map.c_hash(new_rev_id) + print( + f"-> Adding {new_rev} as new revision to the sample set" + ) + new_revs.add(new_rev) + print() + last_revision = revision + last_column = column + return new_revs + + +class SurvivingInteractionsPlot( + HeatMapPlot, plot_name="surviving_interactions_plot" +): + """Plot the normalized evolution of commit interactions.""" + NAME = 'surviving_interactions_plot' + YLABEL = "Surviving Interactions" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any): + super().__init__( + plot_config, get_normalized_interactions_per_commit_wide, **kwargs + ) + self.color_commits = True + + +class SurvivingLinesPlot(HeatMapPlot, plot_name="surviving_commit_plot"): + """Plot the normalized evolution of LOC.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + NAME = 'surviving_lines_plot' + y_label = "Surviving Lines" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any): + super().__init__( + plot_config, get_normalized_lines_per_commit_wide, **kwargs + ) + self.color_commits = True + + +class CompareSurvivalPlot(HeatMapPlot, plot_name="compare_survival"): + """Plot the normalized evolution of commit interactions and LOC.""" + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + pass + + NAME = 'compare_survival' + + y_label = "Commit Interactions vs. Lines" + + def __init__(self, plot_config: PlotConfig, **kwargs: tp.Any): + super().__init__(plot_config, lines_and_interactions, **kwargs) + self.yticklabels = 3 + self.color_commits = True + + +class SingleCommitSurvivalPlotGenerator( + PlotGenerator, + generator_name="single-survival", + options=[ + REQUIRE_CASE_STUDY, + click.argument("revisions", nargs=-1, type=str) + ] +): + """Generator for the Plot of the evolution of a single commit.""" + + def generate(self) -> tp.List['Plot']: + + return [ + SingleCommitPlot( + self.plot_config, revision=revision, **self.plot_kwargs + ) for revision in self.plot_kwargs['revisions'] + ] + + +class SurvivingCommitPlotGenerator( + PlotGenerator, + generator_name="commit-survival", + options=[REQUIRE_CASE_STUDY] +): + """Generator for the Plot of the evolution of all commits.""" + + def generate(self) -> tp.List['Plot']: + return [ + SurvivingInteractionsPlot(self.plot_config, **self.plot_kwargs), + SurvivingLinesPlot(self.plot_config, **self.plot_kwargs), + CompareSurvivalPlot(self.plot_config, **self.plot_kwargs) + ] diff --git a/varats/varats/projects/c_projects/bzip2.py b/varats/varats/projects/c_projects/bzip2.py index fa9de73d7..4de42d5bf 100644 --- a/varats/varats/projects/c_projects/bzip2.py +++ b/varats/varats/projects/c_projects/bzip2.py @@ -106,7 +106,6 @@ def binaries_for_revision( revision: ShortCommitHash ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Bzip2.NAME)) - binary_map.specify_binary( 'build/bzip2', BinaryType.EXECUTABLE, diff --git a/varats/varats/projects/c_projects/sqlite.py b/varats/varats/projects/c_projects/sqlite.py new file mode 100644 index 000000000..d68799c01 --- /dev/null +++ b/varats/varats/projects/c_projects/sqlite.py @@ -0,0 +1,73 @@ +"""Project file for SQLite.""" +import typing as tp + +import benchbuild as bb +from benchbuild.utils.cmd import make +from benchbuild.utils.settings import get_number_of_jobs +from plumbum import local + +from varats.containers.containers import get_base_image, ImageBase +from varats.paper.paper_config import PaperConfigSpecificGit +from varats.project.project_domain import ProjectDomains +from varats.project.project_util import ( + ProjectBinaryWrapper, + BinaryType, + get_local_project_git_path, + verify_binaries, +) +from varats.project.varats_project import VProject +from varats.utils.git_util import ShortCommitHash, RevisionBinaryMap +from varats.utils.settings import bb_cfg + + +class SqLite(VProject): + """SQLite is a C-language library that implements a small, fast, self- + contained, high-reliability, full-featured, SQL database engine.""" + + NAME = 'sqlite' + GROUP = 'c_projects' + DOMAIN = ProjectDomains.DATABASE + + SOURCE = [ + PaperConfigSpecificGit( + project_name="sqlite", + remote="https://github.com/sqlite/sqlite.git", + local="sqlite", + refspec="origin/HEAD", + limit=None, + shallow=False + ) + ] + + CONTAINER = get_base_image( + ImageBase.DEBIAN_10 + ).run('apt', 'install', '-y', 'libtool', 'autoconf') + + @staticmethod + def binaries_for_revision( + revision: ShortCommitHash + ) -> tp.List[ProjectBinaryWrapper]: + binary_map = RevisionBinaryMap(get_local_project_git_path(SqLite.NAME)) + + binary_map.specify_binary('build/sqlite3', BinaryType.EXECUTABLE) + + return binary_map[revision] + + def run_tests(self) -> None: + pass + + def compile(self) -> None: + """Compile the project.""" + sqlite_source = local.path(self.source_of(self.primary_source)) + + c_compiler = bb.compiler.cc(self) + cxx_compiler = bb.compiler.cxx(self) + build_dir = sqlite_source / "build" + with local.cwd(build_dir): + with local.env(CC=str(c_compiler), CXX=str(cxx_compiler)): + bb.watch(local["../configure"])() + + bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) + + with local.cwd(sqlite_source): + verify_binaries(self) diff --git a/varats/varats/tables/case_study_metrics_table.py b/varats/varats/tables/case_study_metrics_table.py index a292da3f3..1e4f3a9dc 100644 --- a/varats/varats/tables/case_study_metrics_table.py +++ b/varats/varats/tables/case_study_metrics_table.py @@ -4,20 +4,35 @@ import pandas as pd +from varats.data.databases.file_status_database import FileStatusDatabase +from varats.mapping.author_map import generate_author_map from varats.mapping.commit_map import get_commit_map from varats.paper.paper_config import get_loaded_paper_config from varats.project.project_util import ( get_project_cls_by_name, get_local_project_git_path, ) +from varats.report.report import FileStatusExtension from varats.table.table import Table from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator +from varats.ts_utils.artefact_util import ( + ReportTypeConverter, + ExperimentTypeConverter, +) +from varats.ts_utils.cli_util import convert_value, make_cli_option +from varats.ts_utils.click_param_types import ( + create_report_type_choice, + create_experiment_type_choice, + REQUIRE_EXPERIMENT_TYPE, +) from varats.utils.git_util import ( calc_project_loc, num_project_commits, num_project_authors, calc_repo_loc, + num_commits, + num_authors, ) LOG = logging.Logger(__name__) @@ -64,7 +79,20 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: } if revision: cs_dict[project_name]["Revision"] = revision.short_hash - + if self.table_kwargs["experiment_type"]: + cmap = get_commit_map(project_name) + file_data = FileStatusDatabase.get_data_for_project( + project_name, ["revision", "file_status"], + cmap, + case_study, + experiment_type=self.table_kwargs["experiment_type"], + tag_blocked=False + ) + cs_dict[project_name]["Analysed Revisions"] = len( + file_data[file_data["file_status"] == + FileStatusExtension.SUCCESS.get_status_extension() + ] + ) cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index")) df = pd.concat(cs_data).sort_index() @@ -79,7 +107,17 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: class CaseStudyMetricsTableGenerator( - TableGenerator, generator_name="cs-metrics-table", options=[] + TableGenerator, + generator_name="cs-metrics-table", + options=[ + convert_value("experiment_type", ExperimentTypeConverter)( + make_cli_option( + "--experiment-type", + type=create_experiment_type_choice(), + help="The experiment type to use." + ) + ) + ] ): """Generates a cs-metrics table for the selected case study(ies).""" diff --git a/varats/varats/ts_utils/click_param_types.py b/varats/varats/ts_utils/click_param_types.py index f080fa4e7..450446193 100644 --- a/varats/varats/ts_utils/click_param_types.py +++ b/varats/varats/ts_utils/click_param_types.py @@ -144,7 +144,7 @@ def create_report_type_choice() -> TypedChoice[tp.Type[BaseReport]]: return TypedChoice(BaseReport.REPORT_TYPES) -def __is_experiment_excluded(experiment_name: str) -> bool: +def is_experiment_excluded(experiment_name: str) -> bool: """Checks if an experiment should be excluded, as we don't want to show/use standard BB experiments.""" if experiment_name in ('raw', 'empty', 'no-measurement'): @@ -160,7 +160,7 @@ def create_experiment_type_choice( return TypedChoice({ k: v for k, v in ExperimentRegistry.experiments.items() - if not __is_experiment_excluded(k) + if not is_experiment_excluded(k) }) @@ -176,7 +176,7 @@ def create_multi_experiment_type_choice( value_dict = { k: [v] # make value a list to be consistent with entry for 'all' for k, v in ExperimentRegistry.experiments.items() - if not __is_experiment_excluded(k) + if not is_experiment_excluded(k) } value_dict["all"] = [ experiment for value in value_dict.values() for experiment in value