From 17925e95c7be95ae7c887b743e098f64eadd6519 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Thu, 18 Jan 2024 16:40:35 +0000 Subject: [PATCH 1/2] force use of report file to be used --- bin/extract_taxa_from_reads.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/bin/extract_taxa_from_reads.py b/bin/extract_taxa_from_reads.py index a87f849..6d27270 100755 --- a/bin/extract_taxa_from_reads.py +++ b/bin/extract_taxa_from_reads.py @@ -34,10 +34,8 @@ def median(l): return l[i] -def load_from_taxonomy(taxonomy_dir): +def load_from_taxonomy(taxonomy_dir, parents, children): taxonomy = os.path.join(taxonomy_dir, "nodes.dmp") - parents = {} - children = defaultdict(set) try: with open(taxonomy, "r") as f: for line in f: @@ -63,9 +61,7 @@ def parse_depth(name): return depth -def infer_hierarchy(report_file): - parents = {} - children = defaultdict(set) +def infer_hierarchy(report_file, parents, children): hierarchy = [] with open(report_file, "r") as f: for line in f: @@ -97,7 +93,8 @@ def infer_hierarchy(report_file): if len(hierarchy) > 1: parent = hierarchy[-2] - parents[ncbi] = parent + if ncbi not in parents: + parents[ncbi] = parent children[parent].add(ncbi) return parents, children @@ -564,11 +561,11 @@ def main(): target_ranks = [] sys.stderr.write("Loading hierarchy\n") - parent, children = None, None + parent = {} + children = defaultdict(set) if args.taxonomy: - parent, children = load_from_taxonomy(args.taxonomy) - else: - parent, children = infer_hierarchy(args.report_file) + parent, children = load_from_taxonomy(args.taxonomy, parent, children) + parent, children = infer_hierarchy(args.report_file, parent, children) # get taxids to extract sys.stderr.write("Loading kreport\n") From 73f0f778263883aeac0912ec8c1f947fd4b4f968 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Thu, 18 Jan 2024 16:50:09 +0000 Subject: [PATCH 2/2] dont add 0 reads to summary --- bin/extract_taxa_from_reads.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/extract_taxa_from_reads.py b/bin/extract_taxa_from_reads.py index 6d27270..5ee2221 100755 --- a/bin/extract_taxa_from_reads.py +++ b/bin/extract_taxa_from_reads.py @@ -381,6 +381,9 @@ def extract_taxa( sys.stderr.write("Write summary\n") summary = [] for taxon in lists_to_extract: + if out_counts[taxon] == 0: + sys.stderr.write("No reads extracted for taxid %s\n" %taxon) + continue if reads2: summary.append( {