From 87131ce0e667c03bd193bd7e48723d39138f4337 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20R=C3=BCdiger=20Steuer?=
 <s8sisteu@stud.uni-saarland.de>
Date: Fri, 29 Sep 2023 17:34:34 +0200
Subject: [PATCH] refactoring for pre-commits

---
 .../data/reports/feature_blame_report.py      | 175 ++++----
 varats/varats/plots/feature_blame_plots.py    | 412 ++++++++++--------
 2 files changed, 326 insertions(+), 261 deletions(-)

diff --git a/varats/varats/data/reports/feature_blame_report.py b/varats/varats/data/reports/feature_blame_report.py
index e0e5d5e93..09903880e 100644
--- a/varats/varats/data/reports/feature_blame_report.py
+++ b/varats/varats/data/reports/feature_blame_report.py
@@ -26,7 +26,8 @@ class StructuralCommitFeatureInteraction:
     occurs in."""
 
     def __init__(
-        self, num_instructions: int, features: tp.List[str], commit: CommitRepoPair
+        self, num_instructions: int, features: tp.List[str],
+        commit: CommitRepoPair
     ) -> None:
         self.__num_instructions = num_instructions
         self.__features = features
@@ -46,7 +47,9 @@ def create_commit_feature_interaction(
             (raw_inst_entry["commit-repo-pair"])["commit"],
             (raw_inst_entry["commit-repo-pair"])["repository"],
         )
-        return StructuralCommitFeatureInteraction(num_instructions, features, commit)
+        return StructuralCommitFeatureInteraction(
+            num_instructions, features, commit
+        )
 
     @property
     def num_instructions(self) -> int:
@@ -68,7 +71,9 @@ class FeatureBlameReportMetaData(FeatureAnalysisReportMetaData):
     pass
 
 
-class StructuralFeatureBlameReport(BaseReport, shorthand="SFBR", file_type="yaml"):
+class StructuralFeatureBlameReport(
+    BaseReport, shorthand="SFBR", file_type="yaml"
+):
     """Data class that gives access to a loaded structural feature blame
     report."""
 
@@ -82,20 +87,17 @@ def __init__(self, path: Path) -> None:
             version_header.raise_if_version_is_less_than(1)
 
             self.__meta_data = (
-                FeatureBlameReportMetaData.create_feature_analysis_report_meta_data(
-                    next(documents)
-                )
+                FeatureBlameReportMetaData.
+                create_feature_analysis_report_meta_data(next(documents))
             )
 
             raw_feature_blame_report = next(documents)
 
             self.__commit_feature_interactions = [
-                StructuralCommitFeatureInteraction.create_commit_feature_interaction(
-                    cfi
-                )
+                StructuralCommitFeatureInteraction.
+                create_commit_feature_interaction(cfi)
                 for cfi in raw_feature_blame_report[
-                    "structural-commit-feature-interactions"
-                ]
+                    "structural-commit-feature-interactions"]
             ]
 
     @property
@@ -112,13 +114,14 @@ def commit_feature_interactions(
         return self.__commit_feature_interactions
 
 
-def generate_feature_scfi_data(SFBR: StructuralFeatureBlameReport) -> pd.DataFrame:
+def generate_feature_scfi_data(
+    SFBR: StructuralFeatureBlameReport
+) -> pd.DataFrame:
     # {ftr:
     # [[inter_commits, inter_commits_nd1, inter_commits_nd>1], [def_ftr_size, pot_ftr_size]]}
-    features_cfi_data: tp.Dict[
-        str,
-        tp.List[tp.List[tp.Set[str], tp.Set[str], tp.Set[str]], tp.List[int, int]],
-    ] = {}
+    features_cfi_data: tp.Dict[str, tp.List[tp.List[tp.Set[str], tp.Set[str],
+                                                    tp.Set[str]],
+                                            tp.List[int, int]],] = {}
     for SCFI in SFBR.commit_feature_interactions:
         commit_hash = ShortCommitHash(SCFI.commit.commit_hash).hash
         nesting_degree: int = len(SCFI.features)
@@ -135,17 +138,14 @@ def generate_feature_scfi_data(SFBR: StructuralFeatureBlameReport) -> pd.DataFra
             elif entry[0][1].isdisjoint([commit_hash]):
                 entry[0][2].add(commit_hash)
             features_cfi_data.update({feature: entry})
-    rows = [
-        [
-            feature_data[0],
-            len(feature_data[1][0][0]),
-            len(feature_data[1][0][1]),
-            len(feature_data[1][0][2]),
-            feature_data[1][1][0],
-            feature_data[1][1][1],
-        ]
-        for feature_data in features_cfi_data.items()
-    ]
+    rows = [[
+        feature_data[0],
+        len(feature_data[1][0][0]),
+        len(feature_data[1][0][1]),
+        len(feature_data[1][0][2]),
+        feature_data[1][1][0],
+        feature_data[1][1][1],
+    ] for feature_data in features_cfi_data.items()]
     return pd.DataFrame(
         rows,
         columns=[
@@ -160,7 +160,8 @@ def generate_feature_scfi_data(SFBR: StructuralFeatureBlameReport) -> pd.DataFra
 
 
 def generate_feature_author_scfi_data(
-    SFBR: StructuralFeatureBlameReport, project_gits: tp.Dict[str, pygit2.Repository]
+    SFBR: StructuralFeatureBlameReport, project_gits: tp.Dict[str,
+                                                              pygit2.Repository]
 ) -> pd.DataFrame:
     # {feature: (authors, size)}
     features_cfi_author_data: tp.Dict[str, tp.Tuple(tp.Set[str], int)] = {}
@@ -173,18 +174,17 @@ def generate_feature_author_scfi_data(
         for feature in SCFI.features:
             entry = features_cfi_author_data.get(feature)
             if not entry:
-                features_cfi_author_data.update(
-                    {feature: (set([author]), SCFI.num_instructions)}
-                )
+                features_cfi_author_data.update({
+                    feature: (set([author]), SCFI.num_instructions)
+                })
             else:
                 entry[0].add(author)
-                features_cfi_author_data.update(
-                    {feature: (entry[0], entry[1] + SCFI.num_instructions)}
-                )
-    rows = [
-        [feature_data[0], len(feature_data[1][0]), feature_data[1][1]]
-        for feature_data in features_cfi_author_data.items()
-    ]
+                features_cfi_author_data.update({
+                    feature: (entry[0], entry[1] + SCFI.num_instructions)
+                })
+    rows = [[feature_data[0],
+             len(feature_data[1][0]), feature_data[1][1]]
+            for feature_data in features_cfi_author_data.items()]
     return pd.DataFrame(
         rows, columns=["feature", "num_implementing_authors", "feature_size"]
     )
@@ -278,7 +278,9 @@ def commits(self) -> tp.List[CommitRepoPair]:
         return self.__commits
 
 
-class DataflowFeatureBlameReport(BaseReport, shorthand="DFBR", file_type="yaml"):
+class DataflowFeatureBlameReport(
+    BaseReport, shorthand="DFBR", file_type="yaml"
+):
     """Data class that gives access to a loaded dataflow feature blame
     report."""
 
@@ -292,18 +294,16 @@ def __init__(self, path: Path) -> None:
             version_header.raise_if_version_is_less_than(1)
 
             self.__meta_data = (
-                FeatureBlameReportMetaData.create_feature_analysis_report_meta_data(
-                    next(documents)
-                )
+                FeatureBlameReportMetaData.
+                create_feature_analysis_report_meta_data(next(documents))
             )
 
             raw_feature_blame_report = next(documents)
 
             self.__commit_feature_interactions = [
-                DataflowCommitFeatureInteraction.create_commit_feature_interaction(cfi)
-                for cfi in raw_feature_blame_report[
-                    "dataflow-commit-feature-interactions"
-                ]
+                DataflowCommitFeatureInteraction.
+                create_commit_feature_interaction(cfi) for cfi in
+                raw_feature_blame_report["dataflow-commit-feature-interactions"]
             ]
 
     @property
@@ -313,7 +313,9 @@ def meta_data(self) -> FeatureAnalysisReportMetaData:
         return self.__meta_data
 
     @property
-    def commit_feature_interactions(self) -> tp.List[DataflowCommitFeatureInteraction]:
+    def commit_feature_interactions(
+        self
+    ) -> tp.List[DataflowCommitFeatureInteraction]:
         """Return all dataflow-based cfis."""
         return self.__commit_feature_interactions
 
@@ -338,10 +340,10 @@ def get_commits_dataflow_interacting_features(
     DFBR: DataflowFeatureBlameReport,
 ) -> tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], tp.Set[str]]]:
     # [hash, ([all_interacting_features], [inside_df], [outside_df])]
-    dfi_commit: tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], tp.Set[str]]] = {}
+    dfi_commit: tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str],
+                                      tp.Set[str]]] = {}
     commits_structurally_interacting_features: tp.Dict[
-        str, tp.Set[str]
-    ] = get_commits_structurally_interacting_features(SFBR)
+        str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR)
 
     for DCFI in DFBR.commit_feature_interactions:
         feature = DCFI.feature
@@ -369,13 +371,11 @@ def get_features_dataflow_affecting_commits(
     SFBR: StructuralFeatureBlameReport, DFBR: DataflowFeatureBlameReport
 ) -> tp.Dict[str, tp.Tuple[tp.Set[CommitRepoPair], tp.Set[CommitRepoPair]]]:
     # {feature, ([interacting_commits_outside], [interacting_commits_inside])}
-    dci_feature: tp.Dict[
-        str, tp.Tuple[tp.Set[CommitRepoPair], tp.Set[CommitRepoPair]]
-    ] = {}
+    dci_feature: tp.Dict[str, tp.Tuple[tp.Set[CommitRepoPair],
+                                       tp.Set[CommitRepoPair]]] = {}
 
     commits_structurally_interacting_with_features: tp.Dict[
-        str, tp.Set[str]
-    ] = get_commits_structurally_interacting_features(SFBR)
+        str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR)
 
     for DCFI in DFBR.commit_feature_interactions:
         feature = DCFI.feature
@@ -409,15 +409,12 @@ def generate_commit_specific_dcfi_data(
     # [hash, ([all_interacting_features], [inside_df], [outside_df])]
     dfi_commit = get_commits_dataflow_interacting_features(SFBR, DFBR)
 
-    rows_commit_dfi = [
-        [
-            commit_data[0],
-            len(commit_data[1][0]),
-            len(commit_data[1][1]),
-            len(commit_data[1][2]),
-        ]
-        for commit_data in dfi_commit.items()
-    ]
+    rows_commit_dfi = [[
+        commit_data[0],
+        len(commit_data[1][0]),
+        len(commit_data[1][1]),
+        len(commit_data[1][2]),
+    ] for commit_data in dfi_commit.items()]
     counter = 0
     for _ in range(0, num_commits - len(dfi_commit)):
         rows_commit_dfi.append([f"fake_hash{counter}", 0, 0, 0])
@@ -439,8 +436,7 @@ def generate_general_commit_dcfi_data(
 ) -> pd.DataFrame:
     row = []
     commits_structurally_interacting_features: tp.Dict[
-        str, tp.Set[str]
-    ] = get_commits_structurally_interacting_features(SFBR)
+        str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR)
     num_structurally_interacting_commits = len(
         commits_structurally_interacting_features.values()
     )
@@ -452,7 +448,8 @@ def generate_general_commit_dcfi_data(
     interacting_structurally_and_through_dataflow = 0
     num_structural_interactions = 0
     # check for every structural CFI, if its respective commit and feature also interact through dataflow
-    for commit_hash, features in commits_structurally_interacting_features.items():
+    for commit_hash, features in commits_structurally_interacting_features.items(
+    ):
         commit_hash: str = ShortCommitHash(commit_hash).hash
         entry = commits_dataflow_interacting_features.get(commit_hash)
         num_structural_interactions += len(features)
@@ -461,11 +458,14 @@ def generate_general_commit_dcfi_data(
                 interacting_structurally_and_through_dataflow += 1
 
     row.append(
-        interacting_structurally_and_through_dataflow
-        / num_structural_interactions
+        interacting_structurally_and_through_dataflow /
+        num_structural_interactions
     )
     print("likelihood_dataflow_interaction_when_interacting_structurally")
-    print(interacting_structurally_and_through_dataflow / num_structural_interactions)
+    print(
+        interacting_structurally_and_through_dataflow /
+        num_structural_interactions
+    )
     print("")
 
     columns = [
@@ -483,17 +483,13 @@ def generate_feature_dcfi_data(
 
     feature_scfi_data = generate_feature_scfi_data(SFBR)
 
-    rows_feature_dci = [
-        [
-            feature_data[0],
-            feature_scfi_data.loc[feature_scfi_data["feature"] == feature_data[0]][
-                "pot_feature_size"
-            ].to_numpy()[0],
-            len(feature_data[1][0]),
-            len(feature_data[1][1]),
-        ]
-        for feature_data in dci_feature.items()
-    ]
+    rows_feature_dci = [[
+        feature_data[0],
+        feature_scfi_data.loc[feature_scfi_data["feature"] == feature_data[0]]
+        ["pot_feature_size"].to_numpy()[0],
+        len(feature_data[1][0]),
+        len(feature_data[1][1]),
+    ] for feature_data in dci_feature.items()]
 
     columns = [
         "feature",
@@ -536,16 +532,13 @@ def generate_feature_author_dcfi_data(
                 continue
             interacting_authors_inside.add(author)
 
-        rows_feature_author_dci.append(
-            [
-                feature,
-                feature_scfi_data.loc[feature_scfi_data["feature"] == feature][
-                    "feature_size"
-                ].to_numpy()[0],
-                len(interacting_authors_outside),
-                len(interacting_authors_inside),
-            ]
-        )
+        rows_feature_author_dci.append([
+            feature,
+            feature_scfi_data.loc[feature_scfi_data["feature"] == feature]
+            ["feature_size"].to_numpy()[0],
+            len(interacting_authors_outside),
+            len(interacting_authors_inside),
+        ])
 
     columns = [
         "feature",
diff --git a/varats/varats/plots/feature_blame_plots.py b/varats/varats/plots/feature_blame_plots.py
index cae98581d..6e086460e 100644
--- a/varats/varats/plots/feature_blame_plots.py
+++ b/varats/varats/plots/feature_blame_plots.py
@@ -57,15 +57,21 @@ def get_structural_report_files_for_project(
     return report_files
 
 
-def get_structural_feature_data_for_case_study(case_study: CaseStudy) -> pd.DataFrame:
-    report_file = get_structural_report_files_for_project(case_study.project_name)[0]
+def get_structural_feature_data_for_case_study(
+    case_study: CaseStudy
+) -> pd.DataFrame:
+    report_file = get_structural_report_files_for_project(
+        case_study.project_name
+    )[0]
     data_frame: pd.DataFrame = pd.DataFrame()
     report = load_structural_feature_blame_report(report_file)
     data_frame = generate_feature_scfi_data(report)
     return data_frame
 
 
-def get_structural_commit_data_for_case_study(case_study: CaseStudy) -> pd.DataFrame:
+def get_structural_commit_data_for_case_study(
+    case_study: CaseStudy
+) -> pd.DataFrame:
     project_name = case_study.project_name
 
     report_file = get_structural_report_files_for_project(project_name)[0]
@@ -77,8 +83,7 @@ def get_structural_commit_data_for_case_study(case_study: CaseStudy) -> pd.DataF
         repo_name: calc_repo_code_churn(
             get_local_project_git_path(project_name, repo_name),
             ChurnConfig.create_c_style_languages_config(),
-        )
-        for repo_name, _ in repo_lookup.items()
+        ) for repo_name, _ in repo_lookup.items()
     }
 
     data_frame = generate_commit_scfi_data(report, code_churn_lookup)
@@ -88,11 +93,11 @@ def get_structural_commit_data_for_case_study(case_study: CaseStudy) -> pd.DataF
 
 ######## STRUCTURAL #########
 
-
 ######## FEATURES #########
 
 
 class FeatureSFBRPlot(Plot, plot_name="feature_sfbr_plot"):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
 
@@ -107,31 +112,37 @@ def plot(self, view_mode: bool) -> None:
 
             stacked_feature_data = pd.DataFrame(
                 {
-                    "Interacting with ND1": data["num_interacting_commits_nd1"].values,
-                    "Interacting with ND>1": data[
-                        "num_interacting_commits_nd>1"
-                    ].values,
+                    "Interacting with ND1":
+                        data["num_interacting_commits_nd1"].values,
+                    "Interacting with ND>1":
+                        data["num_interacting_commits_nd>1"].values,
                 },
                 index=index,
             )
 
             stacked_feature_data.plot.bar(stacked=True, width=0.95, ax=axs[0])
             axs[0].set_xlabel("Features" if first else "", size="13")
-            axs[0].set_ylabel("Num Interacting Commits" if first else "", size="13")
+            axs[0].set_ylabel(
+                "Num Interacting Commits" if first else "", size="13"
+            )
             axs[0].set_title(case_study.project_name, size="16")
 
             data = data.sort_values(by=["def_feature_size"])
 
             stacked_feature_size_data = pd.DataFrame(
                 {
-                    "Definite Feature Size": data["def_feature_size"].values,
-                    "Potential Feature Size": data["pot_feature_size"].values
-                    - data["def_feature_size"].values,
+                    "Definite Feature Size":
+                        data["def_feature_size"].values,
+                    "Potential Feature Size":
+                        data["pot_feature_size"].values -
+                        data["def_feature_size"].values,
                 },
                 index=index,
             )
 
-            stacked_feature_size_data.plot.bar(stacked=True, width=0.95, ax=axs[1])
+            stacked_feature_size_data.plot.bar(
+                stacked=True, width=0.95, ax=axs[1]
+            )
             axs[1].set_xlabel("")
             axs[1].set_ylabel("Feature Size" if first else "", size="13")
 
@@ -142,11 +153,16 @@ def plot(self, view_mode: bool) -> None:
                 ax=axs[2],
             )
             sns.regplot(
-                data=data, x="pot_feature_size", y="num_interacting_commits", ax=axs[2]
+                data=data,
+                x="pot_feature_size",
+                y="num_interacting_commits",
+                ax=axs[2]
             )
 
             axs[2].set_xlabel("Feature Size" if first else "", size="13")
-            axs[2].set_ylabel("Num Interacting Commits" if first else "", size="13")
+            axs[2].set_ylabel(
+                "Num Interacting Commits" if first else "", size="13"
+            )
 
             first = False
 
@@ -156,6 +172,7 @@ class FeatureSFBRPlotGenerator(
     generator_name="feature-sfbr-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
@@ -169,6 +186,7 @@ def generate(self) -> tp.List[Plot]:
 
 
 class CommitSpecificSFBRPlot(Plot, plot_name="commit_specific_sfbr_plot"):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
 
@@ -180,8 +198,12 @@ def plot(self, view_mode: bool) -> None:
                     continue
                 case_study = case_studies[case_study_counter]
 
-                commit_data = get_structural_commit_data_for_case_study(case_study)
-                commit_data = commit_data.sort_values(by=["num_interacting_features"])
+                commit_data = get_structural_commit_data_for_case_study(
+                    case_study
+                )
+                commit_data = commit_data.sort_values(
+                    by=["num_interacting_features"]
+                )
 
                 filter_lrg_commits = apply_tukeys_fence(
                     commit_data, column="commit_size", k=1.5
@@ -190,24 +212,22 @@ def plot(self, view_mode: bool) -> None:
                 commit_data = commit_data["num_interacting_features"]
 
                 interacting_with_nd1 = [
-                    commit_data[index][0] if index in filter_lrg_commits.index else 0
+                    commit_data[index][0]
+                    if index in filter_lrg_commits.index else 0
                     for index in commit_data.index
                 ]
                 interacting_with_at_least_nd2 = [
                     sum(commit_data[index][1:])
-                    if index in filter_lrg_commits.index
-                    else 0
+                    if index in filter_lrg_commits.index else 0
                     for index in commit_data.index
                 ]
                 interacting_with_nd1_lrg_commit = [
-                    0 if index in filter_lrg_commits.index else commit_data[index][0]
-                    for index in commit_data.index
+                    0 if index in filter_lrg_commits.index else
+                    commit_data[index][0] for index in commit_data.index
                 ]
                 interacting_with_at_least_nd2_lrg_commit = [
-                    0
-                    if index in filter_lrg_commits.index
-                    else sum(commit_data[index][1:])
-                    for index in commit_data.index
+                    0 if index in filter_lrg_commits.index else
+                    sum(commit_data[index][1:]) for index in commit_data.index
                 ]
 
                 rng = range(len(commit_data))
@@ -218,7 +238,10 @@ def plot(self, view_mode: bool) -> None:
                     bottom=interacting_with_nd1,
                 )
                 ax.bar(
-                    rng, interacting_with_nd1_lrg_commit, alpha=0.65, color="tab:blue"
+                    rng,
+                    interacting_with_nd1_lrg_commit,
+                    alpha=0.65,
+                    color="tab:blue"
                 )
                 ax.bar(
                     rng,
@@ -235,14 +258,12 @@ def plot(self, view_mode: bool) -> None:
                     labels=[str(i * step) for i in range(6)],
                 )
                 ax.set_title(case_study.project_name)
-                ax.legend(
-                    [
-                        "Interacting with ND1",
-                        "Interacting with ND>1",
-                        "ND1, Large Commit",
-                        "ND>1, Large Commit",
-                    ]
-                )
+                ax.legend([
+                    "Interacting with ND1",
+                    "Interacting with ND>1",
+                    "ND1, Large Commit",
+                    "ND>1, Large Commit",
+                ])
                 case_study_counter += 1
 
 
@@ -251,6 +272,7 @@ class CommitSpecificSFBRPlotGenerator(
     generator_name="commit-specific-sfbr-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
@@ -265,21 +287,20 @@ def get_stacked_proportional_commit_structural_data(
 ) -> pd.DataFrame:
     rows = []
     for case_study in case_studies:
-        number_active_commits = num_active_commits_cs.get(case_study.project_name)
+        number_active_commits = num_active_commits_cs.get(
+            case_study.project_name
+        )
         data_commits = get_general_commit_dataflow_data_for_case_study(
             case_study, number_active_commits
         )
         fraction_commits_implementing_features = data_commits[
-            "fraction_commits_structurally_interacting_with_features"
-        ][0]
+            "fraction_commits_structurally_interacting_with_features"][0]
 
-        rows.append(
-            [
-                case_study.project_name,
-                fraction_commits_implementing_features,
-                1 - fraction_commits_implementing_features,
-            ]
-        )
+        rows.append([
+            case_study.project_name,
+            fraction_commits_implementing_features,
+            1 - fraction_commits_implementing_features,
+        ])
 
     return pd.DataFrame(
         data=rows,
@@ -294,6 +315,7 @@ def get_stacked_proportional_commit_structural_data(
 class CommitProportionalStructuralPlot(
     Plot, plot_name="commit_proportional_structural_plot"
 ):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
 
@@ -303,6 +325,7 @@ class CommitProportionalStructuralPlotGenerator(
     generator_name="commit-proportional-structural-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
@@ -315,7 +338,9 @@ def generate(self) -> tp.List[Plot]:
 ######## DATAFLOW #########
 
 
-def get_dataflow_report_files_for_project(project_name: str) -> tp.List[ReportFilepath]:
+def get_dataflow_report_files_for_project(
+    project_name: str
+) -> tp.List[ReportFilepath]:
     fnf = lambda x: not "DFBR" in x
     report_files: tp.List[ReportFilepath] = get_processed_revisions_files(
         project_name=project_name,
@@ -327,7 +352,9 @@ def get_dataflow_report_files_for_project(project_name: str) -> tp.List[ReportFi
     return report_files
 
 
-def get_both_reports_for_case_study(case_study: CaseStudy) -> tp.Tuple[SFBR, DFBR]:
+def get_both_reports_for_case_study(
+    case_study: CaseStudy
+) -> tp.Tuple[SFBR, DFBR]:
     structural_report_file = get_structural_report_files_for_project(
         case_study.project_name
     )[0]
@@ -344,7 +371,9 @@ def get_general_commit_dataflow_data_for_case_study(
     case_study: CaseStudy, number_active_commits
 ) -> pd.DataFrame:
     SFBR, DFBR = get_both_reports_for_case_study(case_study)
-    data_frame = generate_general_commit_dcfi_data(SFBR, DFBR, number_active_commits)
+    data_frame = generate_general_commit_dcfi_data(
+        SFBR, DFBR, number_active_commits
+    )
 
     return data_frame
 
@@ -354,7 +383,9 @@ def get_commit_specific_dataflow_data_for_case_study(
     number_active_commits: int,
 ) -> pd.DataFrame:
     SFBR, DFBR = get_both_reports_for_case_study(case_study)
-    data_frame = generate_commit_specific_dcfi_data(SFBR, DFBR, number_active_commits)
+    data_frame = generate_commit_specific_dcfi_data(
+        SFBR, DFBR, number_active_commits
+    )
 
     return data_frame
 
@@ -368,7 +399,9 @@ def get_combined_stacked_proportional_commit_dataflow_data(
 ) -> pd.DataFrame:
     rows = []
     for case_study in case_studies:
-        number_active_commits = num_active_commits_cs.get(case_study.project_name)
+        number_active_commits = num_active_commits_cs.get(
+            case_study.project_name
+        )
         dataflow_data = get_commit_specific_dataflow_data_for_case_study(
             case_study, number_active_commits
         )
@@ -385,20 +418,18 @@ def get_combined_stacked_proportional_commit_dataflow_data(
             num_struct_int_commits / number_active_commits
         )
 
-        rows.extend(
+        rows.extend([
             [
-                [
-                    case_study.project_name,
-                    fraction_commits_with_df_int * 100,
-                    "Dataflow",
-                ],
-                [
-                    case_study.project_name,
-                    fraction_commits_with_struct_int * 100,
-                    "Structural",
-                ],
-            ]
-        )
+                case_study.project_name,
+                fraction_commits_with_df_int * 100,
+                "Dataflow",
+            ],
+            [
+                case_study.project_name,
+                fraction_commits_with_struct_int * 100,
+                "Structural",
+            ],
+        ])
 
     return pd.DataFrame(
         data=rows,
@@ -416,7 +447,9 @@ def get_specific_stacked_proportional_commit_dataflow_data(
 ) -> pd.DataFrame:
     rows = []
     for case_study in case_studies:
-        number_active_commits = num_active_commits_cs.get(case_study.project_name)
+        number_active_commits = num_active_commits_cs.get(
+            case_study.project_name
+        )
         data_commits = get_commit_specific_dataflow_data_for_case_study(
             case_study, number_active_commits
         )
@@ -426,38 +459,30 @@ def get_specific_stacked_proportional_commit_dataflow_data(
         )
 
         commits_inside_df = data_commits.loc[
-            data_commits["num_interacting_features_inside_df"] > 0
-        ]
+            data_commits["num_interacting_features_inside_df"] > 0]
         commits_only_inside_df = commits_inside_df.loc[
-            commits_inside_df["num_interacting_features_outside_df"] == 0
-        ]
+            commits_inside_df["num_interacting_features_outside_df"] == 0]
         fraction_commits_only_inside_df = (
             len(commits_only_inside_df) / num_commits_with_df_int
         )
 
         commits_outside_df = data_commits.loc[
-            data_commits["num_interacting_features_outside_df"] > 0
-        ]
+            data_commits["num_interacting_features_outside_df"] > 0]
         commits_only_outside_df = commits_outside_df.loc[
-            commits_outside_df["num_interacting_features_inside_df"] == 0
-        ]
+            commits_outside_df["num_interacting_features_inside_df"] == 0]
         fraction_commits_only_outside_df = (
             len(commits_only_outside_df) / num_commits_with_df_int
         )
 
-        rows.append(
-            [
-                case_study.project_name,
-                fraction_commits_only_outside_df * 100,
-                fraction_commits_only_inside_df * 100,
-                100
-                * (
-                    1
-                    - fraction_commits_only_outside_df
-                    - fraction_commits_only_inside_df
-                ),
-            ]
-        )
+        rows.append([
+            case_study.project_name,
+            fraction_commits_only_outside_df * 100,
+            fraction_commits_only_inside_df * 100,
+            100 * (
+                1 - fraction_commits_only_outside_df -
+                fraction_commits_only_inside_df
+            ),
+        ])
 
     return pd.DataFrame(
         data=rows,
@@ -470,7 +495,10 @@ def get_specific_stacked_proportional_commit_dataflow_data(
     )
 
 
-class ProportionalCommitDFBRPlot(Plot, plot_name="proportional_commit_dfbr_plot"):
+class ProportionalCommitDFBRPlot(
+    Plot, plot_name="proportional_commit_dfbr_plot"
+):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
         num_active_commits_cs: tp.Dict[str, int] = {
@@ -517,7 +545,9 @@ def plot(self, view_mode: bool) -> None:
         plt = data.set_index("Projects").plot(
             kind="bar", stacked=True, ylabel="Proportion (%)", ax=ax_1
         )
-        plt.legend(title="Dataflow Origin", loc="center left", bbox_to_anchor=(1, 0.5))
+        plt.legend(
+            title="Dataflow Origin", loc="center left", bbox_to_anchor=(1, 0.5)
+        )
         ax_1.bar_label(ax_1.containers[0], fmt="%.1f%%")
         ax_1.bar_label(ax_1.containers[1], fmt="%.1f%%")
         ax_1.set_title("Dataflow Origin for Commits")
@@ -528,6 +558,7 @@ class ProportionalCommitDFBRPlotGenerator(
     generator_name="proportional-commit-dfbr-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[case_studies] = self.plot_kwargs["case_study"]
         return [
@@ -540,7 +571,9 @@ def generate(self) -> tp.List[Plot]:
 ######## FEATURES #########
 
 
-def get_feature_dataflow_data_for_case_study(case_study: CaseStudy) -> pd.DataFrame:
+def get_feature_dataflow_data_for_case_study(
+    case_study: CaseStudy
+) -> pd.DataFrame:
     SFBRs, DFBRs = get_both_reports_for_case_study(case_study)
     data_frame = generate_feature_dcfi_data(SFBRs, DFBRs)
 
@@ -548,31 +581,32 @@ def get_feature_dataflow_data_for_case_study(case_study: CaseStudy) -> pd.DataFr
 
 
 class FeatureDFBRPlot(Plot, plot_name="feature_dfbr_plot"):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
-        fig, naxs = pyplot.subplots(nrows=len(case_studies), ncols=2, figsize=(15, 15))
+        fig, naxs = pyplot.subplots(
+            nrows=len(case_studies), ncols=2, figsize=(15, 15)
+        )
         fig.tight_layout(pad=6.5)
-        first:bool = True
+        first: bool = True
         for axs, case_study in zip(naxs, case_studies):
             data = get_feature_dataflow_data_for_case_study(case_study)
             data = data.sort_values(by=["num_interacting_commits_outside_df"])
             rows = []
             for index in data.index:
                 feature = data.at[index, "feature"]
-                rows.extend(
+                rows.extend([
                     [
-                        [
-                            feature,
-                            data.at[index, "num_interacting_commits_outside_df"],
-                            "Outside Commits",
-                        ],
-                        [
-                            feature,
-                            data.at[index, "num_interacting_commits_inside_df"],
-                            "Inside Commits",
-                        ],
-                    ]
-                )
+                        feature,
+                        data.at[index, "num_interacting_commits_outside_df"],
+                        "Outside Commits",
+                    ],
+                    [
+                        feature,
+                        data.at[index, "num_interacting_commits_inside_df"],
+                        "Inside Commits",
+                    ],
+                ])
             df = pd.DataFrame(
                 data=rows,
                 columns=["Feature", "Num Interacting Commits", "Commit Kind"],
@@ -586,9 +620,14 @@ def plot(self, view_mode: bool) -> None:
             )
             axs[0].set_title(case_study.project_name, size=15)
             axs[0].set_xlabel("Features" if first else "", size=13)
-            axs[0].set_ylabel("Num Interacting Commits" if first else "", size=13)
-            axs[0].set_xticklabels(labels=data["feature"].values, rotation= (22.5))
-            if not first: axs[0].legend_.remove()
+            axs[0].set_ylabel(
+                "Num Interacting Commits" if first else "", size=13
+            )
+            axs[0].set_xticklabels(
+                labels=data["feature"].values, rotation=(22.5)
+            )
+            if not first:
+                axs[0].legend_.remove()
 
             sns.regplot(
                 data=data,
@@ -613,7 +652,9 @@ def plot(self, view_mode: bool) -> None:
                 label="Inside Commits",
             )
             axs[1].set_xlabel("Feature Size" if first else "", size=13)
-            axs[1].set_ylabel("Num Interacting Commits" if first else "", size=13)
+            axs[1].set_ylabel(
+                "Num Interacting Commits" if first else "", size=13
+            )
             pyplot.legend(fontsize=10)
             first = False
 
@@ -623,6 +664,7 @@ class FeatureDFBRPlotGenerator(
     generator_name="feature-dfbr-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
@@ -633,14 +675,13 @@ def generate(self) -> tp.List[Plot]:
 
 
 class FeatureSizeCorrDFBRPlot(Plot, plot_name="feature_size_corr_dfbr_plot"):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
-        data = pd.concat(
-            [
-                get_feature_dataflow_data_for_case_study(case_study)
-                for case_study in case_studies
-            ]
-        )
+        data = pd.concat([
+            get_feature_dataflow_data_for_case_study(case_study)
+            for case_study in case_studies
+        ])
         print(data)
         plt = sns.regplot(
             data=data, x="feature_size", y="num_interacting_commits_outside_df"
@@ -656,6 +697,7 @@ class FeatureSizeCorrDFBRPlotGenerator(
     generator_name="feature-size-corr-dfbr-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
@@ -666,18 +708,17 @@ def generate(self) -> tp.List[Plot]:
 
 
 class FeatureDisDFBRPlot(Plot, plot_name="feature_dis_dfbr_plot"):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
         dfs = [
             get_feature_dataflow_data_for_case_study(case_study)
             for case_study in case_studies
         ]
-        data = pd.concat(
-            [
-                get_feature_dataflow_data_for_case_study(case_study)
-                for case_study in case_studies
-            ]
-        )
+        data = pd.concat([
+            get_feature_dataflow_data_for_case_study(case_study)
+            for case_study in case_studies
+        ])
         data = data.sort_values(by=["num_interacting_commits_outside_df"])
         pyplot.figure(figsize=(10.3, 6))
         ax = sns.barplot(
@@ -689,7 +730,9 @@ def plot(self, view_mode: bool) -> None:
         )
         ax.set_xlabel("Feature", size="11")
         ax.set_ylabel("Number of Interacting Outside Commits", size="12")
-        ax.set_title("Feature Commit Dataflow Interactions from Outisde", size="14")
+        ax.set_title(
+            "Feature Commit Dataflow Interactions from Outisde", size="14"
+        )
         return None
 
         fig, naxs = pyplot.subplots(2, 2, figsize=(22, 22))
@@ -709,14 +752,16 @@ def plot(self, view_mode: bool) -> None:
                     palette=["tab:blue"],
                 )
                 ax.set_xlabel("Feature", size="16")
-                ax.set_ylabel("Number of Interacting Outside Commits", size="16")
+                ax.set_ylabel(
+                    "Number of Interacting Outside Commits", size="16"
+                )
                 ax.set_title(case_study.project_name, size="22")
                 case_study_counter += 1
 
         fig.suptitle(
-            "Dataflow Interactions from Outside of Features"
-            + " for Projects "
-            + ",".join([case_study.project_name for case_study in case_studies]),
+            "Dataflow Interactions from Outside of Features" +
+            " for Projects " +
+            ",".join([case_study.project_name for case_study in case_studies]),
             size="26",
         )
 
@@ -726,6 +771,7 @@ class FeatureDisDFBRPlotGenerator(
     generator_name="feature-dis-dfbr-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
@@ -741,10 +787,14 @@ def generate(self) -> tp.List[Plot]:
 def get_structural_feature_author_data_for_case_study(
     case_study: CaseStudy,
 ) -> pd.DataFrame:
-    report_file = get_structural_report_files_for_project(case_study.project_name)[0]
+    report_file = get_structural_report_files_for_project(
+        case_study.project_name
+    )[0]
     project_gits = get_local_project_gits(case_study.project_name)
     report = load_structural_feature_blame_report(report_file)
-    data_frame: pd.DataFrame = generate_feature_author_scfi_data(report, project_gits)
+    data_frame: pd.DataFrame = generate_feature_author_scfi_data(
+        report, project_gits
+    )
 
     return data_frame
 
@@ -759,7 +809,9 @@ def get_dataflow_feature_author_data_for_case_study(
         case_study.project_name
     )[0]
     project_gits = get_local_project_gits(case_study.project_name)
-    structural_report = load_structural_feature_blame_report(structural_report_file)
+    structural_report = load_structural_feature_blame_report(
+        structural_report_file
+    )
     dataflow_report = load_dataflow_feature_blame_report(dataflow_report_file)
     data_frame: pd.DataFrame = generate_feature_author_dcfi_data(
         structural_report, dataflow_report, project_gits
@@ -774,14 +826,15 @@ def get_stacked_author_data_for_case_studies(
 ) -> pd.DataFrame:
     rows = []
 
-    max_num_interacting_authors = max(
-        [max(project_data) for project_data in projects_data]
-    )
+    max_num_interacting_authors = max([
+        max(project_data) for project_data in projects_data
+    ])
 
     for case_study, project_data in zip(case_studies, projects_data):
         count: [int] = [0 for _ in range(0, max_num_interacting_authors)]
         for num_interacting_authors in project_data:
-            count[num_interacting_authors - 1] = count[num_interacting_authors - 1] + 1
+            count[num_interacting_authors -
+                  1] = count[num_interacting_authors - 1] + 1
 
         rows.append([case_study.project_name] + count)
 
@@ -798,15 +851,22 @@ def get_stacked_author_data_for_case_studies(
     return pd.DataFrame(adj_rows, columns=["Project"] + author_columns)
 
 
-class FeatureAuthorStructDisPlot(Plot, plot_name="feature_author_struct_dis_plot"):
+class FeatureAuthorStructDisPlot(
+    Plot, plot_name="feature_author_struct_dis_plot"
+):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
 
         fig, axs = pyplot.subplots(ncols=len(case_studies), figsize=(15, 3))
         counter = 0
         for ax, case_study in zip(axs, case_studies):
-            author_data = get_structural_feature_author_data_for_case_study(case_study)
-            author_data = author_data.sort_values(by=["num_implementing_authors"])
+            author_data = get_structural_feature_author_data_for_case_study(
+                case_study
+            )
+            author_data = author_data.sort_values(
+                by=["num_implementing_authors"]
+            )
             sns.barplot(
                 data=author_data,
                 x="feature",
@@ -834,6 +894,7 @@ class FeatureAuthorStructDisPlotGenerator(
     generator_name="feature-author-struct-dis-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
 
@@ -844,16 +905,19 @@ def generate(self) -> tp.List[Plot]:
         ]
 
 
-class FeatureAuthorDataflowDisPlot(Plot, plot_name="feature_author_dataflow_dis_plot"):
+class FeatureAuthorDataflowDisPlot(
+    Plot, plot_name="feature_author_dataflow_dis_plot"
+):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
         projects_data = [
-            get_dataflow_feature_author_data_for_case_study(case_study).loc[
-                :, "interacting_authors_outside"
-            ]
-            for case_study in case_studies
+            get_dataflow_feature_author_data_for_case_study(case_study).
+            loc[:, "interacting_authors_outside"] for case_study in case_studies
         ]
-        data = get_stacked_author_data_for_case_studies(case_studies, projects_data)
+        data = get_stacked_author_data_for_case_studies(
+            case_studies, projects_data
+        )
 
         data = data.sort_values(by=["1 Author"])
         print(data)
@@ -869,6 +933,7 @@ class FeatureAuthorDataflowDisPlotGenerator(
     generator_name="feature-author-dataflow-dis-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
 
@@ -879,34 +944,36 @@ def generate(self) -> tp.List[Plot]:
         ]
 
 
-def get_combined_author_data_for_case_study(case_study: CaseStudy) -> pd.DataFrame:
-    structural_data = get_structural_feature_author_data_for_case_study(case_study)
-    structural_data = structural_data.sort_values(by=["num_implementing_authors"])
+def get_combined_author_data_for_case_study(
+    case_study: CaseStudy
+) -> pd.DataFrame:
+    structural_data = get_structural_feature_author_data_for_case_study(
+        case_study
+    )
+    structural_data = structural_data.sort_values(
+        by=["num_implementing_authors"]
+    )
     dataflow_data = get_dataflow_feature_author_data_for_case_study(case_study)
 
     combined_rows = []
     for i in structural_data.index:
         feature = structural_data.loc[i, "feature"]
-        num_implementing_authors = structural_data.loc[i, "num_implementing_authors"]
+        num_implementing_authors = structural_data.loc[
+            i, "num_implementing_authors"]
         for _ in range(num_implementing_authors):
-            combined_rows.append(
-                [
-                    feature,
-                    "Implementing Authors",  # type
-                ]
-            )
+            combined_rows.append([
+                feature,
+                "Implementing Authors",  # type
+            ])
     for i in dataflow_data.index:
         feature = dataflow_data.loc[i, "feature"]
         interacting_authors_outside = dataflow_data.loc[
-            i, "interacting_authors_outside"
-        ]
+            i, "interacting_authors_outside"]
         for _ in range(interacting_authors_outside):
-            combined_rows.append(
-                [
-                    feature,
-                    "Interacting Authors Through Outside Dataflow",  # type
-                ]
-            )
+            combined_rows.append([
+                feature,
+                "Interacting Authors Through Outside Dataflow",  # type
+            ])
 
     columns = ["feature", "interaction_type"]
 
@@ -914,6 +981,7 @@ def get_combined_author_data_for_case_study(case_study: CaseStudy) -> pd.DataFra
 
 
 class FeatureCombinedAuthorPlot(Plot, plot_name="feature_combined_author_plot"):
+
     def plot(self, view_mode: bool) -> None:
         case_study: CaseStudy = self.plot_kwargs["case_study"]
         data = get_combined_author_data_for_case_study(case_study)
@@ -933,27 +1001,30 @@ class FeatureCombinedAuthorPlotGenerator(
     generator_name="feature-combined-author-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
             FeatureCombinedAuthorPlot(
                 self.plot_config, case_study=case_study, **self.plot_kwargs
-            )
-            for case_study in case_studies
+            ) for case_study in case_studies
         ]
 
 
-class FeatureSizeCorrAuthorPlot(Plot, plot_name="feature_size_corr_author_plot"):
+class FeatureSizeCorrAuthorPlot(
+    Plot, plot_name="feature_size_corr_author_plot"
+):
+
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
-        data = pd.concat(
-            [
-                get_structural_feature_author_data_for_case_study(case_study)
-                for case_study in case_studies
-            ]
-        )
+        data = pd.concat([
+            get_structural_feature_author_data_for_case_study(case_study)
+            for case_study in case_studies
+        ])
         print(data)
-        ax = sns.regplot(data=data, x="feature_size", y="num_implementing_authors")
+        ax = sns.regplot(
+            data=data, x="feature_size", y="num_implementing_authors"
+        )
         ax.set(xlabel="Feature Size", ylabel="Number Implementing Authors")
 
 
@@ -962,6 +1033,7 @@ class FeatureSizeCorrAuthorPlotGenerator(
     generator_name="feature-size-corr-author-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
+
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [