Skip to content

Commit

Permalink
Flag typing bug issue
Browse files Browse the repository at this point in the history
  • Loading branch information
antsh3k committed Mar 5, 2024
1 parent 9f849a3 commit 1d50c06
Showing 1 changed file with 59 additions and 62 deletions.
121 changes: 59 additions & 62 deletions medcat/evaluate_mct_export/mct_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,67 +321,7 @@ def _eval(self, metacat_model, mct_export) -> dict:
result = self._eval_model(metacat_model.model, data, config=metacat_model.config, tokenizer=metacat_model.tokenizer)

return {'predictions': result, 'meta_values': _}

def generate_report(self, path: str = 'mct_report.xlsx', meta_ann=False, concept_filter: Optional[List] = None):
"""
:param path: Outfile path
:param meta_ann: Include Meta_annotation evaluation in the summary as well
:param concept_filter: Filter the report to only display select concepts of interest. List of cuis.
:return: A full excel report for MedCATtrainer annotation work done.
"""
if not self.cat:
raise ValueError("No model pack specified")
if concept_filter:
with pd.ExcelWriter(path) as writer:
print('Generating report...')
# array-like is allowed by documentation but not by typing
df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False) # type: ignore
df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}'] # type: ignore
df = pd.concat([df, pd.DataFrame([['MCT Custom filter', concept_filter]], columns=df.columns)],
ignore_index = True)
df.to_excel(writer, index=False, sheet_name='medcat_model_card')
self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
print('Evaluating annotations...')
if meta_ann:
ann_df = self.full_annotation_df()
ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None) # Remove timezone information
ann_df.to_excel(writer, index=False, sheet_name='annotations')
else:
ann_df = self.annotation_df()
ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None) # Remove timezone information
ann_df.to_excel(writer, index=False, sheet_name='annotations')
performance_summary_df = self.concept_summary()
performance_summary_df = performance_summary_df[performance_summary_df['cui'].isin(concept_filter)]\
.reset_index(drop=True)
performance_summary_df.to_excel(writer, index=False, sheet_name='concept_summary')
if meta_ann:
print('Evaluating meta_annotations...')
meta_anns_df = self.meta_anns_concept_summary()
meta_anns_df = meta_anns_df[meta_anns_df['cui'].isin(concept_filter)].reset_index(drop=True)
meta_anns_df.to_excel(writer, index=True, sheet_name='meta_annotations_summary')
else:
with pd.ExcelWriter(path) as writer:
print('Generating report...')
df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False) # type: ignore
df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}'] # type: ignore
df.to_excel(writer, index=False, sheet_name='medcat_model_card')
self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
print('Evaluating annotations...')
if meta_ann:
self.full_annotation_df().to_excel(writer, index=False, sheet_name='annotations')
else:
self.annotation_df().to_excel(writer, index=False, sheet_name='annotations')
self.concept_summary().to_excel(writer, index=False, sheet_name='concept_summary')
if meta_ann:
print('Evaluating meta_annotations...')
self.meta_anns_concept_summary().to_excel(writer, index=True, sheet_name='meta_annotations_summary')

return print(f"MCT report saved to: {path}")


''' TODO: clean uo the insert method with the meta_annotations
def full_annotation_df(self) -> pd.DataFrame:
"""
DataFrame of all annotations created including meta_annotation predictions.
Expand Down Expand Up @@ -410,7 +350,7 @@ def full_annotation_df(self) -> pd.DataFrame:
else:
pred_meta_values.append(_meta_values.get(meta_results['predictions'][counter], np.nan))
counter += 1
meta_df.insert(int(meta_df.columns.get_loc(meta_model)) + 1, f'predict_{meta_model}', pred_meta_values)
meta_df.insert(int(meta_df.columns.get_loc(meta_model)) + 1, f'predict_{meta_model}', pred_meta_values) # TODO fix this line

return meta_df

Expand Down Expand Up @@ -465,4 +405,61 @@ def meta_anns_concept_summary(self) -> pd.DataFrame:
meta_anns_df = meta_anns_df.rename_axis('cui').reset_index(drop=False)
meta_anns_df.insert(1, 'concept_name', meta_anns_df['cui'].map(self.cat.cdb.cui2preferred_name))
return meta_anns_df
'''

def generate_report(self, path: str = 'mct_report.xlsx', meta_ann=False, concept_filter: Optional[List] = None):
"""
:param path: Outfile path
:param meta_ann: Include Meta_annotation evaluation in the summary as well
:param concept_filter: Filter the report to only display select concepts of interest. List of cuis.
:return: A full excel report for MedCATtrainer annotation work done.
"""
if not self.cat:
raise ValueError("No model pack specified")
if concept_filter:
with pd.ExcelWriter(path) as writer:
print('Generating report...')
# array-like is allowed by documentation but not by typing
df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False) # type: ignore
df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}'] # type: ignore
df = pd.concat([df, pd.DataFrame([['MCT Custom filter', concept_filter]], columns=df.columns)],
ignore_index = True)
df.to_excel(writer, index=False, sheet_name='medcat_model_card')
self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
print('Evaluating annotations...')
if meta_ann:
ann_df = self.full_annotation_df()
ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None) # Remove timezone information
ann_df.to_excel(writer, index=False, sheet_name='annotations')
else:
ann_df = self.annotation_df()
ann_df = ann_df[ann_df['cui'].isin(concept_filter)].reset_index(drop=True)
ann_df['timestamp'] = ann_df['timestamp'].dt.tz_localize(None) # Remove timezone information
ann_df.to_excel(writer, index=False, sheet_name='annotations')
performance_summary_df = self.concept_summary()
performance_summary_df = performance_summary_df[performance_summary_df['cui'].isin(concept_filter)]\
.reset_index(drop=True)
performance_summary_df.to_excel(writer, index=False, sheet_name='concept_summary')
if meta_ann:
print('Evaluating meta_annotations...')
meta_anns_df = self.meta_anns_concept_summary()
meta_anns_df = meta_anns_df[meta_anns_df['cui'].isin(concept_filter)].reset_index(drop=True)
meta_anns_df.to_excel(writer, index=True, sheet_name='meta_annotations_summary')
else:
with pd.ExcelWriter(path) as writer:
print('Generating report...')
df = pd.DataFrame.from_dict([self.cat.get_model_card(as_dict=True)]).T.reset_index(drop=False) # type: ignore
df.columns = ['MCT report', f'Generated on {date.today().strftime("%Y/%m/%d")}'] # type: ignore
df.to_excel(writer, index=False, sheet_name='medcat_model_card')
self.user_stats().to_excel(writer, index=False, sheet_name='user_stats')
print('Evaluating annotations...')
if meta_ann:
self.full_annotation_df().to_excel(writer, index=False, sheet_name='annotations')
else:
self.annotation_df().to_excel(writer, index=False, sheet_name='annotations')
self.concept_summary().to_excel(writer, index=False, sheet_name='concept_summary')
if meta_ann:
print('Evaluating meta_annotations...')
self.meta_anns_concept_summary().to_excel(writer, index=True, sheet_name='meta_annotations_summary')

return print(f"MCT report saved to: {path}")

0 comments on commit 1d50c06

Please sign in to comment.