From 6a4177494fe1a4dce07f77ceebd1ae0f6181d0be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 18 Mar 2024 16:55:44 +0100 Subject: [PATCH 1/5] Adds tool for creating feature source annotations --- requirements.txt | 2 +- varats/setup.py | 3 +- varats/varats/tools/driver_feature.py | 239 ++++++++++++++++++++ varats/varats/ts_utils/click_param_types.py | 8 + 4 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 varats/varats/tools/driver_feature.py diff --git a/requirements.txt b/requirements.txt index 7bbc7f53e..6093674bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ plotly>=5.13.1 plumbum>=1.6 pre-commit>=3.2.0 PyDriller>=2.4.1 -pygit2>=1.10 +pygit2>=1.14 PyGithub>=1.58 pygraphviz>=1.7 pygtrie>=2.3 diff --git a/varats/setup.py b/varats/setup.py index 1b4879185..ab11d6296 100644 --- a/varats/setup.py +++ b/varats/setup.py @@ -30,7 +30,7 @@ "pandas>=1.5.3", "plotly>=5.13.1", "plumbum>=1.6", - "pygit2>=1.10,<1.14.0", + "pygit2>=1.14", "PyGithub>=1.47", "pygraphviz>=1.7", "pygtrie>=2.3", @@ -65,6 +65,7 @@ 'vara-cs-gui = varats.tools.driver_casestudy_gui:main', 'vara-develop = varats.tools.driver_develop:main', 'vd = varats.tools.driver_develop:main', + 'vara-feature = varats.tools.driver_feature:main', 'vara-gen-bbconfig = ' 'varats.tools.driver_gen_benchbuild_config:main', 'vara-pc = varats.tools.driver_paper_config:main', diff --git a/varats/varats/tools/driver_feature.py b/varats/varats/tools/driver_feature.py new file mode 100644 index 000000000..55cd4140f --- /dev/null +++ b/varats/varats/tools/driver_feature.py @@ -0,0 +1,239 @@ +"""Driver module for `vara-feature`""" +import logging +import re +import textwrap +import typing as tp +from functools import partial + +import click +from pygit2 import Walker, Commit, Blob +from pygit2.enums import SortMode + +from varats.project.project_util import get_local_project_git +from varats.tools.tool_util import configuration_lookup_error_handler +from varats.ts_utils.cli_util import initialize_cli_tool +from varats.ts_utils.click_param_types import create_project_choice +from varats.utils.git_util import FullCommitHash + +LOG = logging.getLogger(__name__) + + +class Location: + LOCATION_FORMAT = re.compile( + r"(?P[\w.]+)\s" + r"(?P\d+):(?P\d+)\s" + r"(?P\d+):(?P\d+)" + ) + + def __init__( + self, file: str, start_line: int, start_col: int, end_line: int, + end_col: int + ) -> None: + self.file = file + self.start_line = start_line + self.start_col = start_col + self.end_line = end_line + self.end_col = end_col + + @staticmethod + def parse_string( + s: str, + old_location: tp.Optional["Location"] = None + ) -> tp.Optional["Location"]: + if old_location is not None and s.isnumeric(): + new_line = int(s) + return Location( + old_location.file, new_line, old_location.start_col, new_line, + old_location.end_col + ) + + match = Location.LOCATION_FORMAT.match(s) + if match is None: + raise click.UsageError( + f"Could not parse location: {s}.\nLocation format is " + f"' : :'" + ) + + return Location( + match.group("file"), int(match.group("start_line")), + int(match.group("start_col")), int(match.group("end_line")), + int(match.group("end_col")) + ) + + def to_xml(self) -> str: + xml = f"{self.file}\n" + xml += f"{self.start_line}{self.start_col}\n" + xml += f"{self.end_line}{self.end_col}\n" + return xml + + def __str__(self) -> str: + return f"{self.file} {self.start_line}:{self.start_col} {self.end_line}:{self.end_col}" + + +class FeatureAnnotation: + + def __init__( + self, + feature_name: str, + location: Location, + introduced: FullCommitHash, + removed: tp.Optional[FullCommitHash] = None + ) -> None: + self.feature_name = feature_name + self.location = location + self.introduced = introduced + self.removed = removed + + def to_xml(self) -> str: + xml = "\n" + xml += " \n" + xml += f" {self.introduced.hash}\n" + if self.removed is not None: + xml += f" {self.removed.hash}\n" + xml += " \n" + xml += textwrap.indent(self.location.to_xml(), " ") + xml += "" + + return xml + + +def prompt_location( + feature_name: str, + commit_hash: FullCommitHash, + old_location: tp.Optional[Location] = None +) -> tp.Optional[Location]: + parse_location: tp.Callable[[str], tp.Optional[Location]] + if old_location is not None: + parse_location = partial( + Location.parse_string, old_location=old_location + ) + else: + parse_location = Location.parse_string + + return click.prompt( + f"Enter location for feature {feature_name} @ {commit_hash.short_hash}", + value_proc=parse_location + ) + + +def get_location_content(commit: Commit, + location: Location) -> tp.Optional[str]: + assert location.start_line == location.end_line, \ + "Multiline locations are not supported yet." + lines = tp.cast(Blob, commit.tree[location.file]).data.splitlines() + + if len(lines) < location.start_line: + return None + + line = lines[location.start_line - 1].decode("utf-8") + + if len(line) <= location.end_col: + return None + + return line[(location.start_col - 1):location.end_col] + + +@click.group() +@configuration_lookup_error_handler +def main() -> None: + """Tool for working with feature models.""" + initialize_cli_tool() + + +@main.command("annotate") +@click.option("--project", "-p", type=create_project_choice(), required=True) +@click.option("--revision", "-r", type=str, required=False) +@click.option( + "--outfile", + "-o", + type=click.File("w"), + default=click.open_file('-', mode="w"), + required=False +) +def __annotate( + project: str, revision: tp.Optional[str], outfile: tp.IO +) -> None: + initialize_cli_tool() + + repo = get_local_project_git(project) + walker: Walker + + walker = repo.walk( + repo.head.target, SortMode.TOPOLOGICAL | SortMode.REVERSE + ) + walker.simplify_first_parent() + + first_commit = next(walker) + if revision is not None: + commit = repo.get(revision) + while first_commit != commit: + first_commit = next(walker) + + tracked_features: dict[str, list[FeatureAnnotation]] = {} + last_annotations: dict[str, FeatureAnnotation] = {} + last_annotation_targets: dict[str, str] = {} + + click.echo(f"Current revision: {first_commit.oid}") + while click.confirm("Annotate another feature?"): + feature_name = click.prompt("Enter feature name to annotate", type=str) + commit_hash = FullCommitHash(str(first_commit.id)) + location = prompt_location(feature_name, commit_hash) + last_annotations[feature_name] = FeatureAnnotation( + feature_name, location, commit_hash + ) + last_annotation_targets[feature_name] = get_location_content( + first_commit, location + ) + tracked_features[feature_name] = [] + LOG.debug( + f"Tracking {feature_name} @ {location}: {last_annotation_targets[feature_name]}" + ) + + for commit in walker: + commit_hash = FullCommitHash(str(commit.id)) + click.echo(f"Current revision: {commit_hash.hash}") + + for feature, annotation in last_annotations.items(): + current_target = get_location_content(commit, annotation.location) + if current_target != last_annotation_targets[feature]: + LOG.debug( + f"{feature}: {current_target} != {last_annotation_targets[feature]}" + ) + # set removed field for annotation and store it + tracked_features[feature].append( + FeatureAnnotation( + annotation.feature_name, annotation.location, + annotation.introduced, commit_hash + ) + ) + + # track new feature location + click.echo(f"Location of feature {feature} has changed.") + new_location = prompt_location( + feature, commit_hash, annotation.location + ) + last_annotations[feature] = FeatureAnnotation( + feature, new_location, commit_hash + ) + last_annotation_targets[feature] = get_location_content( + commit, new_location + ) + LOG.debug( + f"Tracking {feature} @ {new_location}: {last_annotation_targets[feature]}" + ) + + # store remaining annotations + for feature, annotation in last_annotations.items(): + tracked_features[feature].append(annotation) + + click.echo(f"Final annotations written to {outfile.name}.") + for feature, annotations in tracked_features.items(): + outfile.write(f"Annotations for feature {feature}:\n") + for annotation in annotations: + outfile.write(annotation.to_xml()) + outfile.write("\n") + outfile.write("\n\n") + + +if __name__ == '__main__': + main() diff --git a/varats/varats/ts_utils/click_param_types.py b/varats/varats/ts_utils/click_param_types.py index bbd0e6042..06469a170 100644 --- a/varats/varats/ts_utils/click_param_types.py +++ b/varats/varats/ts_utils/click_param_types.py @@ -10,6 +10,8 @@ from varats.data.discover_reports import initialize_reports from varats.experiments.discover_experiments import initialize_experiments from varats.paper.paper_config import get_paper_config +from varats.project.project_util import get_loaded_vara_projects +from varats.projects.discover_projects import initialize_projects from varats.report.report import BaseReport from varats.ts_utils.artefact_util import ( CaseStudyConverter, @@ -146,6 +148,12 @@ def create_report_type_choice() -> TypedChoice[tp.Type[BaseReport]]: return TypedChoice(BaseReport.REPORT_TYPES) +def create_project_choice() -> click.Choice: + initialize_projects() + projects = [proj.NAME for proj in get_loaded_vara_projects()] + return click.Choice(projects) + + def __is_experiment_excluded(experiment_name: str) -> bool: """Checks if an experiment should be excluded, as we don't want to show/use standard BB experiments.""" From 8b9f3e0b1cc811e17ff598d21c72d373b4d366db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 18 Mar 2024 17:00:44 +0100 Subject: [PATCH 2/5] Add documentation --- varats/varats/tools/driver_feature.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/varats/varats/tools/driver_feature.py b/varats/varats/tools/driver_feature.py index 55cd4140f..5191fc77c 100644 --- a/varats/varats/tools/driver_feature.py +++ b/varats/varats/tools/driver_feature.py @@ -19,6 +19,8 @@ class Location: + """A location in a source code file.""" + LOCATION_FORMAT = re.compile( r"(?P[\w.]+)\s" r"(?P\d+):(?P\d+)\s" @@ -40,6 +42,7 @@ def parse_string( s: str, old_location: tp.Optional["Location"] = None ) -> tp.Optional["Location"]: + """Create a location from a string.""" if old_location is not None and s.isnumeric(): new_line = int(s) return Location( @@ -61,6 +64,7 @@ def parse_string( ) def to_xml(self) -> str: + """Convert the location to SPLConqueror feature model format.""" xml = f"{self.file}\n" xml += f"{self.start_line}{self.start_col}\n" xml += f"{self.end_line}{self.end_col}\n" @@ -71,6 +75,7 @@ def __str__(self) -> str: class FeatureAnnotation: + """A versioned feature source annotation.""" def __init__( self, @@ -85,6 +90,7 @@ def __init__( self.removed = removed def to_xml(self) -> str: + """Convert the annotation to SPLConqueror feature model format.""" xml = "\n" xml += " \n" xml += f" {self.introduced.hash}\n" @@ -97,7 +103,7 @@ def to_xml(self) -> str: return xml -def prompt_location( +def __prompt_location( feature_name: str, commit_hash: FullCommitHash, old_location: tp.Optional[Location] = None @@ -116,8 +122,8 @@ def prompt_location( ) -def get_location_content(commit: Commit, - location: Location) -> tp.Optional[str]: +def __get_location_content(commit: Commit, + location: Location) -> tp.Optional[str]: assert location.start_line == location.end_line, \ "Multiline locations are not supported yet." lines = tp.cast(Blob, commit.tree[location.file]).data.splitlines() @@ -177,11 +183,11 @@ def __annotate( while click.confirm("Annotate another feature?"): feature_name = click.prompt("Enter feature name to annotate", type=str) commit_hash = FullCommitHash(str(first_commit.id)) - location = prompt_location(feature_name, commit_hash) + location = __prompt_location(feature_name, commit_hash) last_annotations[feature_name] = FeatureAnnotation( feature_name, location, commit_hash ) - last_annotation_targets[feature_name] = get_location_content( + last_annotation_targets[feature_name] = __get_location_content( first_commit, location ) tracked_features[feature_name] = [] @@ -194,7 +200,7 @@ def __annotate( click.echo(f"Current revision: {commit_hash.hash}") for feature, annotation in last_annotations.items(): - current_target = get_location_content(commit, annotation.location) + current_target = __get_location_content(commit, annotation.location) if current_target != last_annotation_targets[feature]: LOG.debug( f"{feature}: {current_target} != {last_annotation_targets[feature]}" @@ -209,13 +215,13 @@ def __annotate( # track new feature location click.echo(f"Location of feature {feature} has changed.") - new_location = prompt_location( + new_location = __prompt_location( feature, commit_hash, annotation.location ) last_annotations[feature] = FeatureAnnotation( feature, new_location, commit_hash ) - last_annotation_targets[feature] = get_location_content( + last_annotation_targets[feature] = __get_location_content( commit, new_location ) LOG.debug( From d6f2e365c61f5c7e8ad5ff868cf45cb28eae6bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 18 Mar 2024 17:11:19 +0100 Subject: [PATCH 3/5] Resolve pygit2 import cycle in docs --- docs/source/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index 54392b0f2..a14263829 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -100,6 +100,7 @@ import click # isort:skip import git # isort:skip import github # isort:skip +import pygit2.branches # isort:skip import urllib3.exceptions # isort:skip # Some packages use new syntax for type checking that isn't available to us From 2958347730de4324ccc688274fb6a756718da372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 18 Mar 2024 17:23:49 +0100 Subject: [PATCH 4/5] Fix type issues --- varats/varats/tools/driver_feature.py | 30 +++++++++++++++------------ 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/varats/varats/tools/driver_feature.py b/varats/varats/tools/driver_feature.py index 5191fc77c..fc3403fa2 100644 --- a/varats/varats/tools/driver_feature.py +++ b/varats/varats/tools/driver_feature.py @@ -107,7 +107,7 @@ def __prompt_location( feature_name: str, commit_hash: FullCommitHash, old_location: tp.Optional[Location] = None -) -> tp.Optional[Location]: +) -> Location: parse_location: tp.Callable[[str], tp.Optional[Location]] if old_location is not None: parse_location = partial( @@ -116,9 +116,12 @@ def __prompt_location( else: parse_location = Location.parse_string - return click.prompt( - f"Enter location for feature {feature_name} @ {commit_hash.short_hash}", - value_proc=parse_location + return tp.cast( + Location, + click.prompt( + f"Enter location for feature {feature_name} @ {commit_hash.short_hash}", + value_proc=parse_location + ) ) @@ -126,12 +129,13 @@ def __get_location_content(commit: Commit, location: Location) -> tp.Optional[str]: assert location.start_line == location.end_line, \ "Multiline locations are not supported yet." - lines = tp.cast(Blob, commit.tree[location.file]).data.splitlines() + lines: tp.List[bytes] = tp.cast(Blob, commit.tree[location.file + ]).data.splitlines() if len(lines) < location.start_line: return None - line = lines[location.start_line - 1].decode("utf-8") + line: str = lines[location.start_line - 1].decode("utf-8") if len(line) <= location.end_col: return None @@ -157,7 +161,7 @@ def main() -> None: required=False ) def __annotate( - project: str, revision: tp.Optional[str], outfile: tp.IO + project: str, revision: tp.Optional[str], outfile: tp.TextIO ) -> None: initialize_cli_tool() @@ -187,9 +191,9 @@ def __annotate( last_annotations[feature_name] = FeatureAnnotation( feature_name, location, commit_hash ) - last_annotation_targets[feature_name] = __get_location_content( - first_commit, location - ) + target = __get_location_content(first_commit, location) + assert target is not None, "Target must not be None" + last_annotation_targets[feature_name] = target tracked_features[feature_name] = [] LOG.debug( f"Tracking {feature_name} @ {location}: {last_annotation_targets[feature_name]}" @@ -221,9 +225,9 @@ def __annotate( last_annotations[feature] = FeatureAnnotation( feature, new_location, commit_hash ) - last_annotation_targets[feature] = __get_location_content( - commit, new_location - ) + new_target = __get_location_content(commit, new_location) + assert new_target is not None, "Target must not be None" + last_annotation_targets[feature] = new_target LOG.debug( f"Tracking {feature} @ {new_location}: {last_annotation_targets[feature]}" ) From 48a0418f64ee734afecee69a3e10ac32eab9ed97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6hm?= Date: Mon, 18 Mar 2024 17:25:28 +0100 Subject: [PATCH 5/5] Adhere to column limit --- varats/varats/tools/driver_feature.py | 28 ++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/varats/varats/tools/driver_feature.py b/varats/varats/tools/driver_feature.py index fc3403fa2..b0a38da34 100644 --- a/varats/varats/tools/driver_feature.py +++ b/varats/varats/tools/driver_feature.py @@ -66,12 +66,22 @@ def parse_string( def to_xml(self) -> str: """Convert the location to SPLConqueror feature model format.""" xml = f"{self.file}\n" - xml += f"{self.start_line}{self.start_col}\n" - xml += f"{self.end_line}{self.end_col}\n" + xml += ( + f"{self.start_line}" + f"{self.start_col}\n" + ) + xml += ( + f"{self.end_line}" + f"{self.end_col}\n" + ) return xml def __str__(self) -> str: - return f"{self.file} {self.start_line}:{self.start_col} {self.end_line}:{self.end_col}" + return ( + f"{self.file} " + f"{self.start_line}:{self.start_col} " + f"{self.end_line}:{self.end_col}" + ) class FeatureAnnotation: @@ -119,7 +129,8 @@ def __prompt_location( return tp.cast( Location, click.prompt( - f"Enter location for feature {feature_name} @ {commit_hash.short_hash}", + f"Enter location for feature " + f"{feature_name} @ {commit_hash.short_hash}", value_proc=parse_location ) ) @@ -196,7 +207,8 @@ def __annotate( last_annotation_targets[feature_name] = target tracked_features[feature_name] = [] LOG.debug( - f"Tracking {feature_name} @ {location}: {last_annotation_targets[feature_name]}" + f"Tracking {feature_name} @ {location}: " + f"{last_annotation_targets[feature_name]}" ) for commit in walker: @@ -207,7 +219,8 @@ def __annotate( current_target = __get_location_content(commit, annotation.location) if current_target != last_annotation_targets[feature]: LOG.debug( - f"{feature}: {current_target} != {last_annotation_targets[feature]}" + f"{feature}: " + f"{current_target} != {last_annotation_targets[feature]}" ) # set removed field for annotation and store it tracked_features[feature].append( @@ -229,7 +242,8 @@ def __annotate( assert new_target is not None, "Target must not be None" last_annotation_targets[feature] = new_target LOG.debug( - f"Tracking {feature} @ {new_location}: {last_annotation_targets[feature]}" + f"Tracking {feature} @ {new_location}: " + f"{last_annotation_targets[feature]}" ) # store remaining annotations