Skip to content

Commit

Permalink
[Dashboard] Split # invalid terms in temp and terms from other ont fo…
Browse files Browse the repository at this point in the history
…r AS and CT (#306)

* add total to terms and relationships dashboard

* add # temp and # other ont terms for AS and CT

---------

Co-authored-by: Anita Caron <[email protected]>
  • Loading branch information
Anita Caron and anitacaron authored Feb 2, 2024
1 parent 985256e commit 855edb3
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 5 deletions.
61 changes: 59 additions & 2 deletions src/ccf_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def no_parent(log_dict, cell_type, row_number):

asct_b_tab = json.load(open(path))
as_invalid_terms = set()
as_temp_terms = set()
as_out_ub = set()
ct_invalid_terms = set()
ct_temp_terms = set()
ct_out_ct = set()
unique_terms = set()
as_valid_terms = set()
ct_valid_terms = set()
Expand Down Expand Up @@ -87,16 +91,32 @@ def no_parent(log_dict, cell_type, row_number):
as_valid_terms.add(next['id'])
else:
if not check_id(current['id']) and current['rdfs_label'] != '':
if current['id'] != '':
as_out_ub.add(current['id'])
else:
as_temp_terms.add(current['rdfs_label'])
as_invalid_terms.add(current['rdfs_label'])
unique_terms.add(current['rdfs_label'])
elif not check_id(current['id']) and current['name'] != '':
if current['id'] != '':
as_out_ub.add(current['id'])
else:
as_temp_terms.add(current['name'])
as_invalid_terms.add(current['name'])
unique_terms.add(current['name'])

if not check_id(next['id']) and next['rdfs_label'] != '':
if next['id'] != '':
as_out_ub.add(next['id'])
else:
as_temp_terms.add(next['rdfs_label'])
as_invalid_terms.add(next['rdfs_label'])
unique_terms.add(next['rdfs_label'])
elif not check_id(next['id']) and next['name'] != '':
if next['id'] != '':
as_out_ub.add(next['id'])
else:
as_temp_terms.add(next['name'])
as_invalid_terms.add(next['name'])
unique_terms.add(next['name'])

Expand Down Expand Up @@ -125,16 +145,32 @@ def no_parent(log_dict, cell_type, row_number):
ct_valid_terms.add(next['id'])
else:
if not check_id(current['id']) and current['rdfs_label'] != '':
if current['id'] != '':
ct_out_ct.add(current['id'])
else:
ct_temp_terms.add(current['rdfs_label'])
ct_invalid_terms.add(current['rdfs_label'])
unique_terms.add(current['rdfs_label'])
elif not check_id(current['id']) and current['name'] != '':
if current['id'] != '':
ct_out_ct.add(current['id'])
else:
ct_temp_terms.add(current['name'])
ct_invalid_terms.add(current['name'])
unique_terms.add(current['name'])

if not check_id(next['id']) and next['rdfs_label'] != '':
if next['id'] != '':
ct_out_ct.add(next['id'])
else:
ct_temp_terms.add(next['rdfs_label'])
ct_invalid_terms.add(next['rdfs_label'])
unique_terms.add(next['rdfs_label'])
elif not check_id(next['id']) and next['name'] != '':
if next['id'] != '':
ct_out_ct.add(next['id'])
else:
ct_temp_terms.add(next['name'])
ct_invalid_terms.add(next['name'])
unique_terms.add(next['name'])

Expand Down Expand Up @@ -171,16 +207,32 @@ def no_parent(log_dict, cell_type, row_number):
if check_id(last_ct['id']):
ct_valid_terms.add(last_ct['id'])
if not check_id(last_as['id']) and last_as['rdfs_label'] != '':
if last_as['id'] != '':
as_out_ub.add(last_as['id'])
else:
as_temp_terms.add(last_as['rdfs_label'])
as_invalid_terms.add(last_as['rdfs_label'])
unique_terms.add(last_as['rdfs_label'])
elif not check_id(last_as['id']) and last_as['name'] != '':
if last_as['id'] != '':
as_out_ub.add(last_as['id'])
else:
as_temp_terms.add(last_as['name'])
as_invalid_terms.add(last_as['name'])
unique_terms.add(last_as['name'])

if not check_id(last_ct['id']) and last_ct['rdfs_label'] != '':
if last_ct['id'] != '':
ct_out_ct.add(last_ct['id'])
else:
ct_temp_terms.add(last_ct['rdfs_label'])
ct_invalid_terms.add(last_ct['rdfs_label'])
unique_terms.add(last_ct['rdfs_label'])
elif not check_id(last_ct['id']) and last_ct['name'] != '':
if last_ct['id'] != '':
ct_out_ct.add(last_ct['id'])
else:
ct_temp_terms.add(last_ct['name'])
ct_invalid_terms.add(last_ct['name'])
unique_terms.add(last_ct['name'])

Expand Down Expand Up @@ -218,6 +270,7 @@ def no_parent(log_dict, cell_type, row_number):

as_invalid_term_percent = 0
ct_invalid_terms_percent = 0
invalid_terms_percent = 0
if len(as_valid_terms) + len(ct_invalid_terms) > 0:
as_invalid_term_percent = round((len(as_invalid_terms)*100)/(len(as_valid_terms)+len(as_invalid_terms)), 2)
if len(ct_valid_terms) + len(ct_invalid_terms) > 0:
Expand All @@ -227,10 +280,14 @@ def no_parent(log_dict, cell_type, row_number):

report_terms = {
'Table': '',
'AS_valid_term_number': [len(as_valid_terms)],
'AS_invalid_term_number': [len(as_invalid_terms)],
'AS_valid_term_number': [len(as_valid_terms)],
'AS_temp_term_number': [len(as_temp_terms)],
'AS_out_ub': [len(as_out_ub)],
'AS_invalid_term_number': [len(as_invalid_terms)],
'AS_invalid_term_percent': [as_invalid_term_percent],
'CT_valid_term_number': [len(ct_valid_terms)],
'CT_temp_term_number': [len(ct_temp_terms)],
'CT_out_ub': [len(ct_out_ct)],
'CT_invalid_term_number': [len(ct_invalid_terms)],
'CT_invalid_term_percent': [ct_invalid_terms_percent],
'invalid_terms_percent': [invalid_terms_percent]
Expand Down
21 changes: 18 additions & 3 deletions src/dashboard_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def clean_up(report):
def add_link(report):
for row in report.itertuples():
row_table = row.Table
report.at[row.Index, "Table"] = f"[{row_table}]({row_table}/README.md)"
if row_table != "Total":
report.at[row.Index, "Table"] = f"[{row_table}]({row_table}/README.md)"

return report

Expand Down Expand Up @@ -50,24 +51,38 @@ def add_color(report, report_type):

def get_reports(date):
BASE_PATH = "../reports/report_"

ter_report = pd.read_csv(f"{BASE_PATH}terms_{date}.tsv", sep='\t')
ter_report.sort_values(by=["Table"], inplace=True)
ter_report.loc["Total"] = ter_report.sum()
ter_report.loc[ter_report.index[-1], "Table"] = "Total"
ter_report.loc[ter_report.index[-1], "AS_invalid_term_percent"] = ""
ter_report.loc[ter_report.index[-1], "CT_invalid_term_percent"] = ""
ter_report.loc[ter_report.index[-1], "invalid_terms_percent"] = ""
ter_report = add_color(ter_report.reset_index(drop=True), "terms")
ter_report.rename(columns={
"AS_valid_term_number": "# VALID AS TERMS",
"AS_temp_term_number": "# AS TEMP TERMS",
"AS_out_ub": "# AS NOT UBERON TERMS",
"AS_invalid_term_number": "# INVALID AS TERMS",
"AS_invalid_term_percent": "% INVALID AS TERMS",
"CT_valid_term_number": "# VALID CT TERMS",
"CT_temp_term_number": "# CT TEMP TERMS",
"CT_out_ub": "# CT NOT CL TERMS",
"CT_invalid_term_number": "# INVALID CT TERMS",
"CT_invalid_term_percent": "% INVALID CT TERMS",
"invalid_terms_percent": "% INVALID TERMS"
}, inplace=True)
ter_report = add_link(ter_report)
ter_report_md = tsv2md(ter_report)

rel_report = pd.read_csv(f"{BASE_PATH}relationship_{date}.tsv", sep='\t')
rel_report.sort_values(by=["Table"], inplace=True)
rel_report.loc["Total"] = rel_report.sum()
rel_report.loc[rel_report.index[-1], "Table"] = "Total"
rel_report.loc[rel_report.index[-1], "percent_invalid_AS-AS_relationship"] = ""
rel_report.loc[rel_report.index[-1], "percent_invalid_CT-CT_relationship"] = ""
rel_report.loc[rel_report.index[-1], "percent_invalid_CT-AS_relationship"] = ""
rel_report = clean_up(rel_report.reset_index(drop=True))
rel_report = add_color(rel_report, "relations")
rel_report.rename(columns={
Expand Down

0 comments on commit 855edb3

Please sign in to comment.