From de7509221a8f3d1e5c0b9554fbf2f5af710dc4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20R=C3=BCdiger=20Steuer?= Date: Thu, 5 Oct 2023 16:03:25 +0200 Subject: [PATCH] new author plot --- .../data/reports/feature_blame_report.py | 247 ++++++++------- varats/varats/plots/feature_blame_plots.py | 281 ++++++------------ varats/varats/tables/feature_blame_tables.py | 2 - 3 files changed, 204 insertions(+), 326 deletions(-) diff --git a/varats/varats/data/reports/feature_blame_report.py b/varats/varats/data/reports/feature_blame_report.py index 09903880e..66b037ee5 100644 --- a/varats/varats/data/reports/feature_blame_report.py +++ b/varats/varats/data/reports/feature_blame_report.py @@ -26,8 +26,7 @@ class StructuralCommitFeatureInteraction: occurs in.""" def __init__( - self, num_instructions: int, features: tp.List[str], - commit: CommitRepoPair + self, num_instructions: int, features: tp.List[str], commit: CommitRepoPair ) -> None: self.__num_instructions = num_instructions self.__features = features @@ -47,9 +46,7 @@ def create_commit_feature_interaction( (raw_inst_entry["commit-repo-pair"])["commit"], (raw_inst_entry["commit-repo-pair"])["repository"], ) - return StructuralCommitFeatureInteraction( - num_instructions, features, commit - ) + return StructuralCommitFeatureInteraction(num_instructions, features, commit) @property def num_instructions(self) -> int: @@ -71,9 +68,7 @@ class FeatureBlameReportMetaData(FeatureAnalysisReportMetaData): pass -class StructuralFeatureBlameReport( - BaseReport, shorthand="SFBR", file_type="yaml" -): +class StructuralFeatureBlameReport(BaseReport, shorthand="SFBR", file_type="yaml"): """Data class that gives access to a loaded structural feature blame report.""" @@ -87,17 +82,20 @@ def __init__(self, path: Path) -> None: version_header.raise_if_version_is_less_than(1) self.__meta_data = ( - FeatureBlameReportMetaData. - create_feature_analysis_report_meta_data(next(documents)) + FeatureBlameReportMetaData.create_feature_analysis_report_meta_data( + next(documents) + ) ) raw_feature_blame_report = next(documents) self.__commit_feature_interactions = [ - StructuralCommitFeatureInteraction. - create_commit_feature_interaction(cfi) + StructuralCommitFeatureInteraction.create_commit_feature_interaction( + cfi + ) for cfi in raw_feature_blame_report[ - "structural-commit-feature-interactions"] + "structural-commit-feature-interactions" + ] ] @property @@ -114,14 +112,13 @@ def commit_feature_interactions( return self.__commit_feature_interactions -def generate_feature_scfi_data( - SFBR: StructuralFeatureBlameReport -) -> pd.DataFrame: +def generate_feature_scfi_data(SFBR: StructuralFeatureBlameReport) -> pd.DataFrame: # {ftr: # [[inter_commits, inter_commits_nd1, inter_commits_nd>1], [def_ftr_size, pot_ftr_size]]} - features_cfi_data: tp.Dict[str, tp.List[tp.List[tp.Set[str], tp.Set[str], - tp.Set[str]], - tp.List[int, int]],] = {} + features_cfi_data: tp.Dict[ + str, + tp.List[tp.List[tp.Set[str], tp.Set[str], tp.Set[str]], tp.List[int, int]], + ] = {} for SCFI in SFBR.commit_feature_interactions: commit_hash = ShortCommitHash(SCFI.commit.commit_hash).hash nesting_degree: int = len(SCFI.features) @@ -138,14 +135,17 @@ def generate_feature_scfi_data( elif entry[0][1].isdisjoint([commit_hash]): entry[0][2].add(commit_hash) features_cfi_data.update({feature: entry}) - rows = [[ - feature_data[0], - len(feature_data[1][0][0]), - len(feature_data[1][0][1]), - len(feature_data[1][0][2]), - feature_data[1][1][0], - feature_data[1][1][1], - ] for feature_data in features_cfi_data.items()] + rows = [ + [ + feature_data[0], + len(feature_data[1][0][0]), + len(feature_data[1][0][1]), + len(feature_data[1][0][2]), + feature_data[1][1][0], + feature_data[1][1][1], + ] + for feature_data in features_cfi_data.items() + ] return pd.DataFrame( rows, columns=[ @@ -159,37 +159,6 @@ def generate_feature_scfi_data( ) -def generate_feature_author_scfi_data( - SFBR: StructuralFeatureBlameReport, project_gits: tp.Dict[str, - pygit2.Repository] -) -> pd.DataFrame: - # {feature: (authors, size)} - features_cfi_author_data: tp.Dict[str, tp.Tuple(tp.Set[str], int)] = {} - for SCFI in SFBR.commit_feature_interactions: - commit_hash = ShortCommitHash(SCFI.commit.commit_hash).hash - repo = SCFI.commit.repository_name - author = get_author(commit_hash, project_gits.get(repo)) - if author is None: - continue - for feature in SCFI.features: - entry = features_cfi_author_data.get(feature) - if not entry: - features_cfi_author_data.update({ - feature: (set([author]), SCFI.num_instructions) - }) - else: - entry[0].add(author) - features_cfi_author_data.update({ - feature: (entry[0], entry[1] + SCFI.num_instructions) - }) - rows = [[feature_data[0], - len(feature_data[1][0]), feature_data[1][1]] - for feature_data in features_cfi_author_data.items()] - return pd.DataFrame( - rows, columns=["feature", "num_implementing_authors", "feature_size"] - ) - - def generate_commit_scfi_data( SFBR: StructuralFeatureBlameReport, code_churn_lookup ) -> pd.DataFrame: @@ -278,9 +247,7 @@ def commits(self) -> tp.List[CommitRepoPair]: return self.__commits -class DataflowFeatureBlameReport( - BaseReport, shorthand="DFBR", file_type="yaml" -): +class DataflowFeatureBlameReport(BaseReport, shorthand="DFBR", file_type="yaml"): """Data class that gives access to a loaded dataflow feature blame report.""" @@ -294,16 +261,18 @@ def __init__(self, path: Path) -> None: version_header.raise_if_version_is_less_than(1) self.__meta_data = ( - FeatureBlameReportMetaData. - create_feature_analysis_report_meta_data(next(documents)) + FeatureBlameReportMetaData.create_feature_analysis_report_meta_data( + next(documents) + ) ) raw_feature_blame_report = next(documents) self.__commit_feature_interactions = [ - DataflowCommitFeatureInteraction. - create_commit_feature_interaction(cfi) for cfi in - raw_feature_blame_report["dataflow-commit-feature-interactions"] + DataflowCommitFeatureInteraction.create_commit_feature_interaction(cfi) + for cfi in raw_feature_blame_report[ + "dataflow-commit-feature-interactions" + ] ] @property @@ -313,9 +282,7 @@ def meta_data(self) -> FeatureAnalysisReportMetaData: return self.__meta_data @property - def commit_feature_interactions( - self - ) -> tp.List[DataflowCommitFeatureInteraction]: + def commit_feature_interactions(self) -> tp.List[DataflowCommitFeatureInteraction]: """Return all dataflow-based cfis.""" return self.__commit_feature_interactions @@ -340,10 +307,10 @@ def get_commits_dataflow_interacting_features( DFBR: DataflowFeatureBlameReport, ) -> tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], tp.Set[str]]]: # [hash, ([all_interacting_features], [inside_df], [outside_df])] - dfi_commit: tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], - tp.Set[str]]] = {} + dfi_commit: tp.Dict[str, tp.Tuple[tp.Set[str], tp.Set[str], tp.Set[str]]] = {} commits_structurally_interacting_features: tp.Dict[ - str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR) + str, tp.Set[str] + ] = get_commits_structurally_interacting_features(SFBR) for DCFI in DFBR.commit_feature_interactions: feature = DCFI.feature @@ -371,11 +338,13 @@ def get_features_dataflow_affecting_commits( SFBR: StructuralFeatureBlameReport, DFBR: DataflowFeatureBlameReport ) -> tp.Dict[str, tp.Tuple[tp.Set[CommitRepoPair], tp.Set[CommitRepoPair]]]: # {feature, ([interacting_commits_outside], [interacting_commits_inside])} - dci_feature: tp.Dict[str, tp.Tuple[tp.Set[CommitRepoPair], - tp.Set[CommitRepoPair]]] = {} + dci_feature: tp.Dict[ + str, tp.Tuple[tp.Set[CommitRepoPair], tp.Set[CommitRepoPair]] + ] = {} commits_structurally_interacting_with_features: tp.Dict[ - str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR) + str, tp.Set[str] + ] = get_commits_structurally_interacting_features(SFBR) for DCFI in DFBR.commit_feature_interactions: feature = DCFI.feature @@ -409,12 +378,15 @@ def generate_commit_specific_dcfi_data( # [hash, ([all_interacting_features], [inside_df], [outside_df])] dfi_commit = get_commits_dataflow_interacting_features(SFBR, DFBR) - rows_commit_dfi = [[ - commit_data[0], - len(commit_data[1][0]), - len(commit_data[1][1]), - len(commit_data[1][2]), - ] for commit_data in dfi_commit.items()] + rows_commit_dfi = [ + [ + commit_data[0], + len(commit_data[1][0]), + len(commit_data[1][1]), + len(commit_data[1][2]), + ] + for commit_data in dfi_commit.items() + ] counter = 0 for _ in range(0, num_commits - len(dfi_commit)): rows_commit_dfi.append([f"fake_hash{counter}", 0, 0, 0]) @@ -436,7 +408,8 @@ def generate_general_commit_dcfi_data( ) -> pd.DataFrame: row = [] commits_structurally_interacting_features: tp.Dict[ - str, tp.Set[str]] = get_commits_structurally_interacting_features(SFBR) + str, tp.Set[str] + ] = get_commits_structurally_interacting_features(SFBR) num_structurally_interacting_commits = len( commits_structurally_interacting_features.values() ) @@ -448,8 +421,7 @@ def generate_general_commit_dcfi_data( interacting_structurally_and_through_dataflow = 0 num_structural_interactions = 0 # check for every structural CFI, if its respective commit and feature also interact through dataflow - for commit_hash, features in commits_structurally_interacting_features.items( - ): + for commit_hash, features in commits_structurally_interacting_features.items(): commit_hash: str = ShortCommitHash(commit_hash).hash entry = commits_dataflow_interacting_features.get(commit_hash) num_structural_interactions += len(features) @@ -458,15 +430,8 @@ def generate_general_commit_dcfi_data( interacting_structurally_and_through_dataflow += 1 row.append( - interacting_structurally_and_through_dataflow / - num_structural_interactions - ) - print("likelihood_dataflow_interaction_when_interacting_structurally") - print( - interacting_structurally_and_through_dataflow / - num_structural_interactions + interacting_structurally_and_through_dataflow / num_structural_interactions ) - print("") columns = [ "fraction_commits_structurally_interacting_with_features", @@ -483,13 +448,17 @@ def generate_feature_dcfi_data( feature_scfi_data = generate_feature_scfi_data(SFBR) - rows_feature_dci = [[ - feature_data[0], - feature_scfi_data.loc[feature_scfi_data["feature"] == feature_data[0]] - ["pot_feature_size"].to_numpy()[0], - len(feature_data[1][0]), - len(feature_data[1][1]), - ] for feature_data in dci_feature.items()] + rows_feature_dci = [ + [ + feature_data[0], + feature_scfi_data.loc[feature_scfi_data["feature"] == feature_data[0]][ + "pot_feature_size" + ].to_numpy()[0], + len(feature_data[1][0]), + len(feature_data[1][1]), + ] + for feature_data in dci_feature.items() + ] columns = [ "feature", @@ -500,51 +469,67 @@ def generate_feature_dcfi_data( return pd.DataFrame(rows_feature_dci, columns=columns) -def generate_feature_author_dcfi_data( +def generate_feature_author_data( SFBR: StructuralFeatureBlameReport, DFBR: DataflowFeatureBlameReport, project_gits: tp.Dict[str, pygit2.Repository], ) -> pd.DataFrame: - dci_feature = get_features_dataflow_affecting_commits(SFBR, DFBR) - # {feature, ([interacting_authors_outside], [interacting_authors_inside])} - rows_feature_author_dci = [] + # authors that interact with features through inside df + # also interact with them structurally per definiton + # {feature: (struct_authors, outside_df_authors, size)} + feature_author_data: tp.Dict[str, tp.Tuple(tp.Set[str], tp.Set[str], int)] = {} + for SCFI in SFBR.commit_feature_interactions: + commit_hash = SCFI.commit.commit_hash + repo = SCFI.commit.repository_name + author = get_author(commit_hash, project_gits.get(repo)) + if author is None: + continue + for feature in SCFI.features: + entry = feature_author_data.get(feature) + if not entry: + feature_author_data.update( + {feature: (set([author]), (set([])), SCFI.num_instructions)} + ) + else: + entry[0].add(author) + feature_author_data.update( + {feature: (entry[0], entry[1], entry[2] + SCFI.num_instructions)} + ) - feature_scfi_data = generate_feature_scfi_data(SFBR) + dci_feature = get_features_dataflow_affecting_commits(SFBR, DFBR) for feature_data in dci_feature.items(): feature = feature_data[0] + entry = feature_author_data.get(feature) + if not entry: + continue interacting_commits_outside = feature_data[1][0] - interacting_authors_outside: tp.Set[str] = set([]) for commit in interacting_commits_outside: commit_hash = commit.commit_hash repo = commit.repository_name author = get_author(commit_hash, project_gits.get(repo)) if author is None: continue - interacting_authors_outside.add(author) - - interacting_commits_inside = feature_data[1][1] - interacting_authors_inside: tp.Set[str] = set([]) - for commit in interacting_commits_inside: - commit_hash = commit.commit_hash - repo = commit.repository_name - author = get_author(commit_hash, project_gits.get(repo)) - if author is None: - continue - interacting_authors_inside.add(author) - - rows_feature_author_dci.append([ - feature, - feature_scfi_data.loc[feature_scfi_data["feature"] == feature] - ["feature_size"].to_numpy()[0], - len(interacting_authors_outside), - len(interacting_authors_inside), - ]) - - columns = [ - "feature", - "feature_size", - "interacting_authors_outside", - "interacting_authors_inside", + entry[1].add(author) + feature_author_data.update({feature: (entry[0], entry[1], entry[2])}) + + rows = [ + [ + feature_data[0], + len(feature_data[1][0]), + len(feature_data[1][1]), + len(feature_data[1][1].difference(feature_data[1][0])), + feature_data[1][2], + ] + for feature_data in feature_author_data.items() ] - return pd.DataFrame(rows_feature_author_dci, columns=columns) + return pd.DataFrame( + data=rows, + columns=[ + "feature", + "struct_authors", + "df_authors", + "unique_df_authors", + "feature_size", + ], + ) diff --git a/varats/varats/plots/feature_blame_plots.py b/varats/varats/plots/feature_blame_plots.py index ef87832d0..9fd010d78 100644 --- a/varats/varats/plots/feature_blame_plots.py +++ b/varats/varats/plots/feature_blame_plots.py @@ -20,8 +20,7 @@ generate_commit_specific_dcfi_data, generate_general_commit_dcfi_data, generate_feature_dcfi_data, - generate_feature_author_scfi_data, - generate_feature_author_dcfi_data, + generate_feature_author_data, ) from varats.jupyterhelper.file import ( load_structural_feature_blame_report, @@ -703,18 +702,7 @@ def generate(self) -> tp.List[Plot]: ########## AUTHORS ########### -def get_structural_feature_author_data_for_case_study( - case_study: CaseStudy, -) -> pd.DataFrame: - report_file = get_structural_report_files_for_project(case_study.project_name)[0] - project_gits = get_local_project_gits(case_study.project_name) - report = load_structural_feature_blame_report(report_file) - data_frame: pd.DataFrame = generate_feature_author_scfi_data(report, project_gits) - - return data_frame - - -def get_dataflow_feature_author_data_for_case_study( +def get_feature_author_data_for_case_study( case_study: CaseStudy, ) -> pd.DataFrame: structural_report_file = get_structural_report_files_for_project( @@ -726,211 +714,118 @@ def get_dataflow_feature_author_data_for_case_study( project_gits = get_local_project_gits(case_study.project_name) structural_report = load_structural_feature_blame_report(structural_report_file) dataflow_report = load_dataflow_feature_blame_report(dataflow_report_file) - data_frame: pd.DataFrame = generate_feature_author_dcfi_data( + data_frame: pd.DataFrame = generate_feature_author_data( structural_report, dataflow_report, project_gits ) return data_frame -def get_stacked_author_data_for_case_studies( - case_studies: tp.List[CaseStudy], - projects_data, -) -> pd.DataFrame: - rows = [] - - max_num_interacting_authors = max( - [max(project_data) for project_data in projects_data] - ) - - for case_study, project_data in zip(case_studies, projects_data): - count: [int] = [0 for _ in range(0, max_num_interacting_authors)] - for num_interacting_authors in project_data: - count[num_interacting_authors - 1] = count[num_interacting_authors - 1] + 1 - - rows.append([case_study.project_name] + count) - - author_columns, adj_rows = ( - [], - [[case_study.project_name] for case_study in case_studies], - ) - for i in range(1, max_num_interacting_authors + 1): - s = np.sum([int(rows[j][i]) for j in range(0, len(case_studies))]) - if s > 0: - author_columns.append(str(i) + " Author" + ("s" if i > 1 else "")) - for j in range(0, len(case_studies)): - adj_rows[j].append(rows[j][i]) - return pd.DataFrame(adj_rows, columns=["Project"] + author_columns) - - -class FeatureAuthorStructDisPlot(Plot, plot_name="feature_author_struct_dis_plot"): +class AuthorCFIPlot(Plot, plot_name="author_cfi_plot"): def plot(self, view_mode: bool) -> None: case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"] + fig, naxs = pyplot.subplots(nrows=len(case_studies), ncols=2, figsize=(15, 15)) + grid = pyplot.GridSpec(len(case_studies), 2) + fig.tight_layout(pad=6.5) + row: int = 1 + corr_x_pos = [0, 600, 20, 15] + corr_y_pos = [(1.9, 1.83), (1.8, 1.2), (2.5, 2.35), (5.6, 5.2)] + for axs, case_study in zip(naxs, case_studies): + create_subtitle(fig, grid[row - 1, ::], case_study.project_name) + data = get_feature_author_data_for_case_study(case_study) + data = data.sort_values(by=["feature_size"]) - fig, axs = pyplot.subplots(ncols=len(case_studies), figsize=(15, 3)) - counter = 0 - for ax, case_study in zip(axs, case_studies): - author_data = get_structural_feature_author_data_for_case_study(case_study) - author_data = author_data.sort_values(by=["num_implementing_authors"]) + rows = [] + for index in data.index: + feature = data.at[index, "feature"] + rows.extend( + [ + [ + feature, + data.at[index, "struct_authors"], + "Structurally", + ], + [ + feature, + data.at[index, "df_authors"], + "Through Outside DF", + ], + [ + feature, + data.at[index, "unique_df_authors"], + "Only Through DF", + ], + ] + ) + df = pd.DataFrame( + data=rows, + columns=["Feature", "Num Interacting Authors", "Interaction Type"], + ) + print(df) sns.barplot( - data=author_data, - x="feature", - y="num_implementing_authors", - color="tab:blue", - ax=ax, + data=df, + x="Feature", + y="Num Interacting Authors", + hue="Interaction Type", + ax=axs[0], ) - if counter == 0: - ax.set_xlabel("Features") - ax.set_ylabel("Num Implementing Authors") - else: - ax.set_xlabel("") - ax.set_ylabel("") - x_rng = range(1, len(author_data) + 1, 2) - ax.set_xticks(ticks=x_rng, labels=[str(i) for i in x_rng]) - max_impl_authors = max(author_data["num_implementing_authors"]) - y_rng = range(1, max_impl_authors + 1) - ax.set_yticks(ticks=y_rng, labels=[str(i) for i in y_rng]) - ax.set_title(case_study.project_name) - counter += 1 - - -class FeatureAuthorStructDisPlotGenerator( - PlotGenerator, - generator_name="feature-author-struct-dis-plot", - options=[REQUIRE_MULTI_CASE_STUDY], -): - def generate(self) -> tp.List[Plot]: - case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study") + axs[0].set_xlabel("Features (sorted by size)" if row == 1 else "", size=13) + axs[0].set_ylabel("Num Interacting Authors", size=13) + axs[0].set_xticklabels(labels=data["feature"].values, rotation=(22.5)) - return [ - FeatureAuthorStructDisPlot( - self.plot_config, case_studies=case_studies, **self.plot_kwargs + sns.regplot( + data=data, + x="feature_size", + y="struct_authors", + ci=None, + ax=axs[1], + label="Structural Interactions", ) - ] - - -class FeatureAuthorDataflowDisPlot(Plot, plot_name="feature_author_dataflow_dis_plot"): - def plot(self, view_mode: bool) -> None: - case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"] - projects_data = [ - get_dataflow_feature_author_data_for_case_study(case_study).loc[ - :, "interacting_authors_outside" - ] - for case_study in case_studies - ] - data = get_stacked_author_data_for_case_studies(case_studies, projects_data) - - data = data.sort_values(by=["1 Author"]) - print(data) - data.set_index("Project").plot( - kind="bar", - stacked=True, - ylabel="Number of Features Affected Through Outside Dataflow by", - ) - - -class FeatureAuthorDataflowDisPlotGenerator( - PlotGenerator, - generator_name="feature-author-dataflow-dis-plot", - options=[REQUIRE_MULTI_CASE_STUDY], -): - def generate(self) -> tp.List[Plot]: - case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study") - - return [ - FeatureAuthorDataflowDisPlot( - self.plot_config, case_studies=case_studies, **self.plot_kwargs + sns.regplot( + data=data, + x="feature_size", + y="df_authors", + ci=None, + ax=axs[1], + label="(Outside) Dataflow Interactions", ) - ] - - -def get_combined_author_data_for_case_study(case_study: CaseStudy) -> pd.DataFrame: - structural_data = get_structural_feature_author_data_for_case_study(case_study) - structural_data = structural_data.sort_values(by=["num_implementing_authors"]) - dataflow_data = get_dataflow_feature_author_data_for_case_study(case_study) - - combined_rows = [] - for i in structural_data.index: - feature = structural_data.loc[i, "feature"] - num_implementing_authors = structural_data.loc[i, "num_implementing_authors"] - for _ in range(num_implementing_authors): - combined_rows.append( - [ - feature, - "Implementing Authors", # type - ] + axs[1].set_xlabel("Feature Size", size=13) + axs[1].set_ylabel("Num Interacting Authors", size=13) + axs[1].legend(ncol=1) + + corr, p_value = stats.pearsonr( + data["struct_authors"].values, + data["feature_size"].values, ) - for i in dataflow_data.index: - feature = dataflow_data.loc[i, "feature"] - interacting_authors_outside = dataflow_data.loc[ - i, "interacting_authors_outside" - ] - for _ in range(interacting_authors_outside): - combined_rows.append( - [ - feature, - "Interacting Authors Through Outside Dataflow", # type - ] + axs[1].text( + corr_x_pos[row-1], + corr_y_pos[row-1][0], + "corr=" + str(round(corr, 3)) + ", p-value=" + str(round(p_value, 3)), + color="tab:blue", ) - - columns = ["feature", "interaction_type"] - - return pd.DataFrame(combined_rows, columns=columns) - - -class FeatureCombinedAuthorPlot(Plot, plot_name="feature_combined_author_plot"): - def plot(self, view_mode: bool) -> None: - case_study: CaseStudy = self.plot_kwargs["case_study"] - data = get_combined_author_data_for_case_study(case_study) - print(data) - pyplot.figure(figsize=(13, 8)) - sns.histplot( - data=data, - x="feature", - hue="interaction_type", - multiple="dodge", - shrink=0.8, - ) - - -class FeatureCombinedAuthorPlotGenerator( - PlotGenerator, - generator_name="feature-combined-author-plot", - options=[REQUIRE_MULTI_CASE_STUDY], -): - def generate(self) -> tp.List[Plot]: - case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study") - return [ - FeatureCombinedAuthorPlot( - self.plot_config, case_study=case_study, **self.plot_kwargs + corr, p_value = stats.pearsonr( + data["df_authors"].values, + data["feature_size"].values, + ) + axs[1].text( + corr_x_pos[row-1], + corr_y_pos[row-1][1], + "corr=" + str(round(corr, 3)) + ", p-value=" + str(round(p_value, 3)), + color="tab:orange", ) - for case_study in case_studies - ] - -class FeatureSizeCorrAuthorPlot(Plot, plot_name="feature_size_corr_author_plot"): - def plot(self, view_mode: bool) -> None: - case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_studies"] - data = pd.concat( - [ - get_structural_feature_author_data_for_case_study(case_study) - for case_study in case_studies - ] - ) - print(data) - ax = sns.regplot(data=data, x="feature_size", y="num_implementing_authors") - ax.set(xlabel="Feature Size", ylabel="Number Implementing Authors") + row += 1 -class FeatureSizeCorrAuthorPlotGenerator( +class AuthorCFIPlotGenerator( PlotGenerator, - generator_name="feature-size-corr-author-plot", + generator_name="author-cfi-plot", options=[REQUIRE_MULTI_CASE_STUDY], ): def generate(self) -> tp.List[Plot]: case_studies: tp.List[CaseStudy] = self.plot_kwargs.pop("case_study") return [ - FeatureSizeCorrAuthorPlot( + AuthorCFIPlot( self.plot_config, case_studies=case_studies, **self.plot_kwargs ) ] diff --git a/varats/varats/tables/feature_blame_tables.py b/varats/varats/tables/feature_blame_tables.py index d908942c2..f3efdd365 100644 --- a/varats/varats/tables/feature_blame_tables.py +++ b/varats/varats/tables/feature_blame_tables.py @@ -12,8 +12,6 @@ get_commit_specific_dataflow_data_for_case_study, get_general_commit_dataflow_data_for_case_study, get_feature_dataflow_data_for_case_study, - get_structural_feature_author_data_for_case_study, - get_dataflow_feature_author_data_for_case_study, ) from varats.table.table import Table from varats.table.table_utils import dataframe_to_table