From de7509221a8f3d1e5c0b9554fbf2f5af710dc4d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Simon=20R=C3=BCdiger=20Steuer?=
 <s8sisteu@stud.uni-saarland.de>
Date: Thu, 5 Oct 2023 16:03:25 +0200
Subject: [PATCH] new author plot

---
 .../data/reports/feature_blame_report.py      | 247 ++++++++-------
 varats/varats/plots/feature_blame_plots.py    | 281 ++++++------------
 varats/varats/tables/feature_blame_tables.py  |   2 -
 3 files changed, 204 insertions(+), 326 deletions(-)

diff --git a/varats/varats/data/reports/feature_blame_report.py b/varats/varats/data/reports/feature_blame_report.py
index 09903880e..66b037ee5 100644
--- a/varats/varats/data/reports/feature_blame_report.py
+++ b/varats/varats/data/reports/feature_blame_report.py
@@ -26,8 +26,7 @@ class StructuralCommitFeatureInteraction:
     occurs in."""
 
     def __init__(
-        self, num_instructions: int, features: tp.List[str],
-        commit: CommitRepoPair
+        self, num_instructions: int, features: tp.List[str], commit: CommitRepoPair
     ) -> None:
         self.__num_instructions = num_instructions
         self.__features = features
@@ -47,9 +46,7 @@ def create_commit_feature_interaction(
             (raw_inst_entry["commit-repo-pair"])["commit"],
             (raw_inst_entry["commit-repo-pair"])["repository"],
         )
-        return StructuralCommitFeatureInteraction(
-            num_instructions, features, commit
-        )
+        return StructuralCommitFeatureInteraction(num_instructions, features, commit)
 
     @property
     def num_instructions(self) -> int:
@@ -71,9 +68,7 @@ class FeatureBlameReportMetaData(FeatureAnalysisReportMetaData):
     pass
 
 
-class StructuralFeatureBlameReport(
-    BaseReport, shorthand="SFBR", file_type="yaml"
-):
+class StructuralFeatureBlameReport(BaseReport, shorthand="SFBR", file_type="yaml"):
     """Data class that gives access to a loaded structural feature blame
     report."""
 
@@ -87,17 +82,20 @@ def __init__(self, path: Path) -> None:
             version_header.raise_if_version_is_less_than(1)
 
             self.__meta_data = (
-                FeatureBlameReportMetaData.
-                create_feature_analysis_report_meta_data(next(documents))
+                FeatureBlameReportMetaData.create_feature_analysis_report_meta_data(
+                    next(documents)
+                )
             )
 
             raw_feature_blame_report = next(documents)
 
             self.__commit_feature_interactions = [
-                StructuralCommitFeatureInteraction.
-                create_commit_feature_interaction(cfi)
+                StructuralCommitFeatureInteraction.create_commit_feature_interaction(
+                    cfi
+                )
                 for cfi in raw_feature_blame_report[
-                    "structural-commit-feature-interactions"]
+                    "structural-commit-feature-interactions"
+                ]
             ]
 
     @property
@@ -114,14 +112,13 @@ def commit_feature_interactions(
         return self.__commit_feature_interactions
 
 
-def generate_feature_scfi_data(
-    SFBR: StructuralFeatureBlameReport
-) -> pd.DataFrame:
+def generate_feature_scfi_data(SFBR: StructuralFeatureBlameReport) -> pd.DataFrame:
     # {ftr:
     # [[inter_commits, inter_commits_nd1, inter_commits_nd>1], [def_ftr_size, pot_ftr_size]]}
-    features_cfi_data: tp.Dict[str, tp.List[tp.List[tp.Set[str], tp.Set[str],
-                                                    tp.Set[str]],
-                                            tp.List[int, int]],] = {}
+    features_cfi_data: tp.Dict[
+        str,
+        tp.List[tp.List[tp.Set[str], tp.Set[str], tp.Set[str]], tp.List[int, int]],
+    ] = {}
     for SCFI in SFBR.commit_feature_interactions:
         commit_hash = ShortCommitHash(SCFI.commit.commit_hash).hash
         nesting_degree: int = len(SCFI.features)
@@ -138,14 +135,17 @@ def generate_feature_scfi_data(
             elif entry[0][1].isdisjoint([commit_hash]):
                 entry[0][2].add(commit_hash)
             features_cfi_data.update({feature: entry})
-    rows = [[
-        feature_data[0],
-        len(feature_data[1][0][0]),
-        len(feature_data[1][0][1]),
-        len(feature_data[1][0][2]),
-        feature_data[1][1][0],
-        feature_data[1][1][1],
-    ] for feature_data in features_cfi_data.items()]
+    rows = [
+        [
+            feature_data[0],
+            len(feature_data[1][0][0]),
+            len(feature_data[1][0][1]),
+            len(feature_data[1][0][2]),
+            feature_data[1][1][0],
+            feature_data[1][1][1],
+        ]
+        for feature_data in features_cfi_data.items()
+    ]
     return pd.DataFrame(
         rows,
         columns=[
@@ -159,37 +159,6 @@ def generate_feature_scfi_data(
     )
 
 
-def generate_feature_author_scfi_data(
-    SFBR: StructuralFeatureBlameReport, project_gits: tp.Dict[str,
-                                                              pygit2.Repository]
-) -> pd.DataFrame:
-    # {feature: (authors, size)}
-    features_cfi_author_data: tp.Dict[str, tp.Tuple(tp.Set[str], int)] = {}
-    for SCFI in SFBR.commit_feature_interactions:
-        commit_hash = ShortCommitHash(SCFI.commit.commit_hash).hash
-        repo = SCFI.commit.repository_name
-        author = get_author(commit_hash, project_gits.get(repo))
-        if author is None:
-            continue
-        for feature in SCFI.features:
-            entry = features_cfi_author_data.get(feature)
-            if not entry:
-                features_cfi_author_data.update({
-                    feature: (set([author]), SCFI.num_instructions)
-                })
-            else:
-                entry[0].add(author)
-                features_cfi_author_data.update({
-                    feature: (entry[0], entry[1] + SCFI.num_instructions)
-                })
-    rows = [[feature_data[0],
-             len(feature_data[1][0]), feature_data[1][1]]
-            for feature_data in features_cfi_author_data.items()]
-    return pd.DataFrame(
-        rows, columns=["feature", "num_implementing_authors", "feature_size"]
-    )
-
-
 def generate_commit_scfi_data(
     SFBR: StructuralFeatureBlameReport, code_churn_lookup
 ) -> pd.DataFrame:
@@ -278,9 +247,7 @@ def commits(self) -> tp.List[CommitRepoPair]:
         return self.__commits
 
 
-class DataflowFeatureBlameReport(
-    BaseReport, shorthand="DFBR", file_type="yaml"
-):
+class DataflowFeatureBlameReport(BaseReport, shorthand="DFBR", file_type="yaml"):
     """Data class that gives access to a loaded dataflow feature blame
     report."""
 
@@ -294,16 +261,18 @@ def __init__(self, path: Path) -> None:
             version_header.raise_if_version_is_less_than(1)
 
             self.__meta_data = (
-                FeatureBlameReportMetaData.
-                create_feature_analysis_report_meta_data(next(documents))
+                FeatureBlameReportMetaData.create_feature_analysis_report_meta_data(
+                    next(documents)
+                )
             )
 
             raw_feature_blame_report = next(documents)
 
             self.__commit_feature_interactions = [
-                DataflowCommitFeatureInteraction.
-                create_commit_feature_interaction(cfi) for cfi in
-                raw_feature_blame_report["dataflow-commit-feature-interactions"]
+                DataflowCommitFeatureInteraction.create_commit_feature_interaction(cfi)
+                for cfi in raw_feature_blame_report[
+                    "dataflow-commit-feature-interactions"
+                ]
             ]
 
     @property
@@ -313,9 +282,7 @@ def meta_data(self) -> FeatureAnalysisReportMetaData:
         return self.__meta_data
 
     @property
-    def commit_feature_interactions(
-        self
-    ) -> tp.List[DataflowCommitFeatureInteraction]:
+    def commit_feature_interactions(self) -> tp.List[DataflowCommitFeatureInteraction]:
         """Return all dataflow-based cfis."""
         return self.__commit_feature_interactions
 
@@ -340,10 +307,10 @@ def get_commits_dataflow_interacting_features(
     DFBR: DataflowFeatureBlameReport,
 ) -> tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], tp.Set[str]]]:
     # [hash, ([all_interacting_features], [inside_df], [outside_df])]
-    dfi_commit: tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str],
-                                      tp.Set[str]]] = {}
+    dfi_commit: tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], tp.Set[str]]] = {}
     commits_structurally_interacting_features: tp.Dict[
-        str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR)
+        str, tp.Set[str]
+    ] = get_commits_structurally_interacting_features(SFBR)
 
     for DCFI in DFBR.commit_feature_interactions:
         feature = DCFI.feature
@@ -371,11 +338,13 @@ def get_features_dataflow_affecting_commits(
     SFBR: StructuralFeatureBlameReport, DFBR: DataflowFeatureBlameReport
 ) -> tp.Dict[str, tp.Tuple[tp.Set[CommitRepoPair], tp.Set[CommitRepoPair]]]:
     # {feature, ([interacting_commits_outside], [interacting_commits_inside])}
-    dci_feature: tp.Dict[str, tp.Tuple[tp.Set[CommitRepoPair],
-                                       tp.Set[CommitRepoPair]]] = {}
+    dci_feature: tp.Dict[
+        str, tp.Tuple[tp.Set[CommitRepoPair], tp.Set[CommitRepoPair]]
+    ] = {}
 
     commits_structurally_interacting_with_features: tp.Dict[
-        str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR)
+        str, tp.Set[str]
+    ] = get_commits_structurally_interacting_features(SFBR)
 
     for DCFI in DFBR.commit_feature_interactions:
         feature = DCFI.feature
@@ -409,12 +378,15 @@ def generate_commit_specific_dcfi_data(
     # [hash, ([all_interacting_features], [inside_df], [outside_df])]
     dfi_commit = get_commits_dataflow_interacting_features(SFBR, DFBR)
 
-    rows_commit_dfi = [[
-        commit_data[0],
-        len(commit_data[1][0]),
-        len(commit_data[1][1]),
-        len(commit_data[1][2]),
-    ] for commit_data in dfi_commit.items()]
+    rows_commit_dfi = [
+        [
+            commit_data[0],
+            len(commit_data[1][0]),
+            len(commit_data[1][1]),
+            len(commit_data[1][2]),
+        ]
+        for commit_data in dfi_commit.items()
+    ]
     counter = 0
     for _ in range(0, num_commits - len(dfi_commit)):
         rows_commit_dfi.append([f"fake_hash{counter}", 0, 0, 0])
@@ -436,7 +408,8 @@ def generate_general_commit_dcfi_data(
 ) -> pd.DataFrame:
     row = []
     commits_structurally_interacting_features: tp.Dict[
-        str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR)
+        str, tp.Set[str]
+    ] = get_commits_structurally_interacting_features(SFBR)
     num_structurally_interacting_commits = len(
         commits_structurally_interacting_features.values()
     )
@@ -448,8 +421,7 @@ def generate_general_commit_dcfi_data(
     interacting_structurally_and_through_dataflow = 0
     num_structural_interactions = 0
     # check for every structural CFI, if its respective commit and feature also interact through dataflow
-    for commit_hash, features in commits_structurally_interacting_features.items(
-    ):
+    for commit_hash, features in commits_structurally_interacting_features.items():
         commit_hash: str = ShortCommitHash(commit_hash).hash
         entry = commits_dataflow_interacting_features.get(commit_hash)
         num_structural_interactions += len(features)
@@ -458,15 +430,8 @@ def generate_general_commit_dcfi_data(
                 interacting_structurally_and_through_dataflow += 1
 
     row.append(
-        interacting_structurally_and_through_dataflow /
-        num_structural_interactions
-    )
-    print("likelihood_dataflow_interaction_when_interacting_structurally")
-    print(
-        interacting_structurally_and_through_dataflow /
-        num_structural_interactions
+        interacting_structurally_and_through_dataflow / num_structural_interactions
     )
-    print("")
 
     columns = [
         "fraction_commits_structurally_interacting_with_features",
@@ -483,13 +448,17 @@ def generate_feature_dcfi_data(
 
     feature_scfi_data = generate_feature_scfi_data(SFBR)
 
-    rows_feature_dci = [[
-        feature_data[0],
-        feature_scfi_data.loc[feature_scfi_data["feature"] == feature_data[0]]
-        ["pot_feature_size"].to_numpy()[0],
-        len(feature_data[1][0]),
-        len(feature_data[1][1]),
-    ] for feature_data in dci_feature.items()]
+    rows_feature_dci = [
+        [
+            feature_data[0],
+            feature_scfi_data.loc[feature_scfi_data["feature"] == feature_data[0]][
+                "pot_feature_size"
+            ].to_numpy()[0],
+            len(feature_data[1][0]),
+            len(feature_data[1][1]),
+        ]
+        for feature_data in dci_feature.items()
+    ]
 
     columns = [
         "feature",
@@ -500,51 +469,67 @@ def generate_feature_dcfi_data(
     return pd.DataFrame(rows_feature_dci, columns=columns)
 
 
-def generate_feature_author_dcfi_data(
+def generate_feature_author_data(
     SFBR: StructuralFeatureBlameReport,
     DFBR: DataflowFeatureBlameReport,
     project_gits: tp.Dict[str, pygit2.Repository],
 ) -> pd.DataFrame:
-    dci_feature = get_features_dataflow_affecting_commits(SFBR, DFBR)
-    # {feature, ([interacting_authors_outside], [interacting_authors_inside])}
-    rows_feature_author_dci = []
+    # authors that interact with features through inside df
+    # also interact with them structurally per definiton
+    # {feature: (struct_authors, outside_df_authors, size)}
+    feature_author_data: tp.Dict[str, tp.Tuple(tp.Set[str], tp.Set[str], int)] = {}
+    for SCFI in SFBR.commit_feature_interactions:
+        commit_hash = SCFI.commit.commit_hash
+        repo = SCFI.commit.repository_name
+        author = get_author(commit_hash, project_gits.get(repo))
+        if author is None:
+            continue
+        for feature in SCFI.features:
+            entry = feature_author_data.get(feature)
+            if not entry:
+                feature_author_data.update(
+                    {feature: (set([author]), (set([])), SCFI.num_instructions)}
+                )
+            else:
+                entry[0].add(author)
+                feature_author_data.update(
+                    {feature: (entry[0], entry[1], entry[2] + SCFI.num_instructions)}
+                )
 
-    feature_scfi_data = generate_feature_scfi_data(SFBR)
+    dci_feature = get_features_dataflow_affecting_commits(SFBR, DFBR)
     for feature_data in dci_feature.items():
         feature = feature_data[0]
+        entry = feature_author_data.get(feature)
+        if not entry:
+            continue
         interacting_commits_outside = feature_data[1][0]
-        interacting_authors_outside: tp.Set[str] = set([])
         for commit in interacting_commits_outside:
             commit_hash = commit.commit_hash
             repo = commit.repository_name
             author = get_author(commit_hash, project_gits.get(repo))
             if author is None:
                 continue
-            interacting_authors_outside.add(author)
-
-        interacting_commits_inside = feature_data[1][1]
-        interacting_authors_inside: tp.Set[str] = set([])
-        for commit in interacting_commits_inside:
-            commit_hash = commit.commit_hash
-            repo = commit.repository_name
-            author = get_author(commit_hash, project_gits.get(repo))
-            if author is None:
-                continue
-            interacting_authors_inside.add(author)
-
-        rows_feature_author_dci.append([
-            feature,
-            feature_scfi_data.loc[feature_scfi_data["feature"] == feature]
-            ["feature_size"].to_numpy()[0],
-            len(interacting_authors_outside),
-            len(interacting_authors_inside),
-        ])
-
-    columns = [
-        "feature",
-        "feature_size",
-        "interacting_authors_outside",
-        "interacting_authors_inside",
+            entry[1].add(author)
+        feature_author_data.update({feature: (entry[0], entry[1], entry[2])})
+
+    rows = [
+        [
+            feature_data[0],
+            len(feature_data[1][0]),
+            len(feature_data[1][1]),
+            len(feature_data[1][1].difference(feature_data[1][0])),
+            feature_data[1][2],
+        ]
+        for feature_data in feature_author_data.items()
     ]
 
-    return pd.DataFrame(rows_feature_author_dci, columns=columns)
+    return pd.DataFrame(
+        data=rows,
+        columns=[
+            "feature",
+            "struct_authors",
+            "df_authors",
+            "unique_df_authors",
+            "feature_size",
+        ],
+    )
diff --git a/varats/varats/plots/feature_blame_plots.py b/varats/varats/plots/feature_blame_plots.py
index ef87832d0..9fd010d78 100644
--- a/varats/varats/plots/feature_blame_plots.py
+++ b/varats/varats/plots/feature_blame_plots.py
@@ -20,8 +20,7 @@
     generate_commit_specific_dcfi_data,
     generate_general_commit_dcfi_data,
     generate_feature_dcfi_data,
-    generate_feature_author_scfi_data,
-    generate_feature_author_dcfi_data,
+    generate_feature_author_data,
 )
 from varats.jupyterhelper.file import (
     load_structural_feature_blame_report,
@@ -703,18 +702,7 @@ def generate(self) -> tp.List[Plot]:
 ########## AUTHORS ###########
 
 
-def get_structural_feature_author_data_for_case_study(
-    case_study: CaseStudy,
-) -> pd.DataFrame:
-    report_file = get_structural_report_files_for_project(case_study.project_name)[0]
-    project_gits = get_local_project_gits(case_study.project_name)
-    report = load_structural_feature_blame_report(report_file)
-    data_frame: pd.DataFrame = generate_feature_author_scfi_data(report, project_gits)
-
-    return data_frame
-
-
-def get_dataflow_feature_author_data_for_case_study(
+def get_feature_author_data_for_case_study(
     case_study: CaseStudy,
 ) -> pd.DataFrame:
     structural_report_file = get_structural_report_files_for_project(
@@ -726,211 +714,118 @@ def get_dataflow_feature_author_data_for_case_study(
     project_gits = get_local_project_gits(case_study.project_name)
     structural_report = load_structural_feature_blame_report(structural_report_file)
     dataflow_report = load_dataflow_feature_blame_report(dataflow_report_file)
-    data_frame: pd.DataFrame = generate_feature_author_dcfi_data(
+    data_frame: pd.DataFrame = generate_feature_author_data(
         structural_report, dataflow_report, project_gits
     )
 
     return data_frame
 
 
-def get_stacked_author_data_for_case_studies(
-    case_studies: tp.List[CaseStudy],
-    projects_data,
-) -> pd.DataFrame:
-    rows = []
-
-    max_num_interacting_authors = max(
-        [max(project_data) for project_data in projects_data]
-    )
-
-    for case_study, project_data in zip(case_studies, projects_data):
-        count: [int] = [0 for _ in range(0, max_num_interacting_authors)]
-        for num_interacting_authors in project_data:
-            count[num_interacting_authors - 1] = count[num_interacting_authors - 1] + 1
-
-        rows.append([case_study.project_name] + count)
-
-    author_columns, adj_rows = (
-        [],
-        [[case_study.project_name] for case_study in case_studies],
-    )
-    for i in range(1, max_num_interacting_authors + 1):
-        s = np.sum([int(rows[j][i]) for j in range(0, len(case_studies))])
-        if s > 0:
-            author_columns.append(str(i) + " Author" + ("s" if i > 1 else ""))
-            for j in range(0, len(case_studies)):
-                adj_rows[j].append(rows[j][i])
-    return pd.DataFrame(adj_rows, columns=["Project"] + author_columns)
-
-
-class FeatureAuthorStructDisPlot(Plot, plot_name="feature_author_struct_dis_plot"):
+class AuthorCFIPlot(Plot, plot_name="author_cfi_plot"):
     def plot(self, view_mode: bool) -> None:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
+        fig, naxs = pyplot.subplots(nrows=len(case_studies), ncols=2, figsize=(15, 15))
+        grid = pyplot.GridSpec(len(case_studies), 2)
+        fig.tight_layout(pad=6.5)
+        row: int = 1
+        corr_x_pos = [0, 600, 20, 15]
+        corr_y_pos = [(1.9, 1.83), (1.8, 1.2), (2.5, 2.35), (5.6, 5.2)]
+        for axs, case_study in zip(naxs, case_studies):
+            create_subtitle(fig, grid[row - 1, ::], case_study.project_name)
+            data = get_feature_author_data_for_case_study(case_study)
+            data = data.sort_values(by=["feature_size"])
 
-        fig, axs = pyplot.subplots(ncols=len(case_studies), figsize=(15, 3))
-        counter = 0
-        for ax, case_study in zip(axs, case_studies):
-            author_data = get_structural_feature_author_data_for_case_study(case_study)
-            author_data = author_data.sort_values(by=["num_implementing_authors"])
+            rows = []
+            for index in data.index:
+                feature = data.at[index, "feature"]
+                rows.extend(
+                    [
+                        [
+                            feature,
+                            data.at[index, "struct_authors"],
+                            "Structurally",
+                        ],
+                        [
+                            feature,
+                            data.at[index, "df_authors"],
+                            "Through Outside DF",
+                        ],
+                        [
+                            feature,
+                            data.at[index, "unique_df_authors"],
+                            "Only Through DF",
+                        ],
+                    ]
+                )
+            df = pd.DataFrame(
+                data=rows,
+                columns=["Feature", "Num Interacting Authors", "Interaction Type"],
+            )
+            print(df)
             sns.barplot(
-                data=author_data,
-                x="feature",
-                y="num_implementing_authors",
-                color="tab:blue",
-                ax=ax,
+                data=df,
+                x="Feature",
+                y="Num Interacting Authors",
+                hue="Interaction Type",
+                ax=axs[0],
             )
-            if counter == 0:
-                ax.set_xlabel("Features")
-                ax.set_ylabel("Num Implementing Authors")
-            else:
-                ax.set_xlabel("")
-                ax.set_ylabel("")
-            x_rng = range(1, len(author_data) + 1, 2)
-            ax.set_xticks(ticks=x_rng, labels=[str(i) for i in x_rng])
-            max_impl_authors = max(author_data["num_implementing_authors"])
-            y_rng = range(1, max_impl_authors + 1)
-            ax.set_yticks(ticks=y_rng, labels=[str(i) for i in y_rng])
-            ax.set_title(case_study.project_name)
-            counter += 1
-
-
-class FeatureAuthorStructDisPlotGenerator(
-    PlotGenerator,
-    generator_name="feature-author-struct-dis-plot",
-    options=[REQUIRE_MULTI_CASE_STUDY],
-):
-    def generate(self) -> tp.List[Plot]:
-        case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
+            axs[0].set_xlabel("Features (sorted by size)" if row == 1 else "", size=13)
+            axs[0].set_ylabel("Num Interacting Authors", size=13)
+            axs[0].set_xticklabels(labels=data["feature"].values, rotation=(22.5))
 
-        return [
-            FeatureAuthorStructDisPlot(
-                self.plot_config, case_studies=case_studies, **self.plot_kwargs
+            sns.regplot(
+                data=data,
+                x="feature_size",
+                y="struct_authors",
+                ci=None,
+                ax=axs[1],
+                label="Structural Interactions",
             )
-        ]
-
-
-class FeatureAuthorDataflowDisPlot(Plot, plot_name="feature_author_dataflow_dis_plot"):
-    def plot(self, view_mode: bool) -> None:
-        case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
-        projects_data = [
-            get_dataflow_feature_author_data_for_case_study(case_study).loc[
-                :, "interacting_authors_outside"
-            ]
-            for case_study in case_studies
-        ]
-        data = get_stacked_author_data_for_case_studies(case_studies, projects_data)
-
-        data = data.sort_values(by=["1 Author"])
-        print(data)
-        data.set_index("Project").plot(
-            kind="bar",
-            stacked=True,
-            ylabel="Number of Features Affected Through Outside Dataflow by",
-        )
-
-
-class FeatureAuthorDataflowDisPlotGenerator(
-    PlotGenerator,
-    generator_name="feature-author-dataflow-dis-plot",
-    options=[REQUIRE_MULTI_CASE_STUDY],
-):
-    def generate(self) -> tp.List[Plot]:
-        case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
-
-        return [
-            FeatureAuthorDataflowDisPlot(
-                self.plot_config, case_studies=case_studies, **self.plot_kwargs
+            sns.regplot(
+                data=data,
+                x="feature_size",
+                y="df_authors",
+                ci=None,
+                ax=axs[1],
+                label="(Outside) Dataflow Interactions",
             )
-        ]
-
-
-def get_combined_author_data_for_case_study(case_study: CaseStudy) -> pd.DataFrame:
-    structural_data = get_structural_feature_author_data_for_case_study(case_study)
-    structural_data = structural_data.sort_values(by=["num_implementing_authors"])
-    dataflow_data = get_dataflow_feature_author_data_for_case_study(case_study)
-
-    combined_rows = []
-    for i in structural_data.index:
-        feature = structural_data.loc[i, "feature"]
-        num_implementing_authors = structural_data.loc[i, "num_implementing_authors"]
-        for _ in range(num_implementing_authors):
-            combined_rows.append(
-                [
-                    feature,
-                    "Implementing Authors",  # type
-                ]
+            axs[1].set_xlabel("Feature Size", size=13)
+            axs[1].set_ylabel("Num Interacting Authors", size=13)
+            axs[1].legend(ncol=1)
+            
+            corr, p_value = stats.pearsonr(
+                data["struct_authors"].values,
+                data["feature_size"].values,
             )
-    for i in dataflow_data.index:
-        feature = dataflow_data.loc[i, "feature"]
-        interacting_authors_outside = dataflow_data.loc[
-            i, "interacting_authors_outside"
-        ]
-        for _ in range(interacting_authors_outside):
-            combined_rows.append(
-                [
-                    feature,
-                    "Interacting Authors Through Outside Dataflow",  # type
-                ]
+            axs[1].text(
+                corr_x_pos[row-1],
+                corr_y_pos[row-1][0],
+                "corr=" + str(round(corr, 3)) + ", p-value=" + str(round(p_value, 3)),
+                color="tab:blue",
             )
-
-    columns = ["feature", "interaction_type"]
-
-    return pd.DataFrame(combined_rows, columns=columns)
-
-
-class FeatureCombinedAuthorPlot(Plot, plot_name="feature_combined_author_plot"):
-    def plot(self, view_mode: bool) -> None:
-        case_study: CaseStudy = self.plot_kwargs["case_study"]
-        data = get_combined_author_data_for_case_study(case_study)
-        print(data)
-        pyplot.figure(figsize=(13, 8))
-        sns.histplot(
-            data=data,
-            x="feature",
-            hue="interaction_type",
-            multiple="dodge",
-            shrink=0.8,
-        )
-
-
-class FeatureCombinedAuthorPlotGenerator(
-    PlotGenerator,
-    generator_name="feature-combined-author-plot",
-    options=[REQUIRE_MULTI_CASE_STUDY],
-):
-    def generate(self) -> tp.List[Plot]:
-        case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
-        return [
-            FeatureCombinedAuthorPlot(
-                self.plot_config, case_study=case_study, **self.plot_kwargs
+            corr, p_value = stats.pearsonr(
+                data["df_authors"].values,
+                data["feature_size"].values,
+            )
+            axs[1].text(
+                corr_x_pos[row-1],
+                corr_y_pos[row-1][1],
+                "corr=" + str(round(corr, 3)) + ", p-value=" + str(round(p_value, 3)),
+                color="tab:orange",
             )
-            for case_study in case_studies
-        ]
-
 
-class FeatureSizeCorrAuthorPlot(Plot, plot_name="feature_size_corr_author_plot"):
-    def plot(self, view_mode: bool) -> None:
-        case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"]
-        data = pd.concat(
-            [
-                get_structural_feature_author_data_for_case_study(case_study)
-                for case_study in case_studies
-            ]
-        )
-        print(data)
-        ax = sns.regplot(data=data, x="feature_size", y="num_implementing_authors")
-        ax.set(xlabel="Feature Size", ylabel="Number Implementing Authors")
+            row += 1
 
 
-class FeatureSizeCorrAuthorPlotGenerator(
+class AuthorCFIPlotGenerator(
     PlotGenerator,
-    generator_name="feature-size-corr-author-plot",
+    generator_name="author-cfi-plot",
     options=[REQUIRE_MULTI_CASE_STUDY],
 ):
     def generate(self) -> tp.List[Plot]:
         case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study")
         return [
-            FeatureSizeCorrAuthorPlot(
+            AuthorCFIPlot(
                 self.plot_config, case_studies=case_studies, **self.plot_kwargs
             )
         ]
diff --git a/varats/varats/tables/feature_blame_tables.py b/varats/varats/tables/feature_blame_tables.py
index d908942c2..f3efdd365 100644
--- a/varats/varats/tables/feature_blame_tables.py
+++ b/varats/varats/tables/feature_blame_tables.py
@@ -12,8 +12,6 @@
     get_commit_specific_dataflow_data_for_case_study,
     get_general_commit_dataflow_data_for_case_study,
     get_feature_dataflow_data_for_case_study,
-    get_structural_feature_author_data_for_case_study,
-    get_dataflow_feature_author_data_for_case_study,
 )
 from varats.table.table import Table
 from varats.table.table_utils import dataframe_to_table