diff --git a/cat_merge/file_utils.py b/cat_merge/file_utils.py index 16ce761..473b500 100644 --- a/cat_merge/file_utils.py +++ b/cat_merge/file_utils.py @@ -237,3 +237,10 @@ def write(kg: MergedKG, name: str, output_dir: str): write_df(df=kg.edges, filename=edges_path) write_tar(tar_path, [nodes_path, edges_path]) + +def write_qc(qc: MergeQC, name: str, output_dir: str): + duplicate_nodes_path = f"{output_dir}/qc/{name}-duplicate-nodes.tsv.gz" + dangling_edges_path = f"{output_dir}/qc/{name}-dangling-edges.tsv.gz" + + write_df(df=qc.duplicate_nodes, filename=duplicate_nodes_path) + write_df(df=qc.dangling_edges, filename=dangling_edges_path) diff --git a/cat_merge/merge.py b/cat_merge/merge.py index d862f06..3e4447a 100644 --- a/cat_merge/merge.py +++ b/cat_merge/merge.py @@ -5,7 +5,7 @@ import yaml import logging -from cat_merge.file_utils import read_dfs, read_tar_dfs, get_files, write +from cat_merge.file_utils import read_dfs, read_tar_dfs, get_files, write, write_qc from cat_merge.merge_utils import merge_kg from cat_merge.qc_utils import create_qc_report @@ -82,6 +82,8 @@ def merge( output_dir=output_dir ) + write_qc(name=name, qc=qc, output_dir=output_dir) + if qc_report: print("Generating QC report") qc_report = create_qc_report(kg, qc)