From fb5c4aed8da527118941e58a1fd85c0d9ddb69dd Mon Sep 17 00:00:00 2001 From: James Gilbert Date: Tue, 26 Nov 2024 14:19:05 +0000 Subject: [PATCH] Fix autosome count check which was counting Unlocs --- src/tola/assembly/assembly_stats.py | 6 ++++-- tests/data/bChlMac1_3/bChlMac1_3-pretext-to-tpf.log | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tola/assembly/assembly_stats.py b/src/tola/assembly/assembly_stats.py index 8f9d126..207c2b4 100644 --- a/src/tola/assembly/assembly_stats.py +++ b/src/tola/assembly/assembly_stats.py @@ -229,8 +229,10 @@ def check_consistent_autosome_count( ) -> str | None: chr_counts = {} for hap, asm in hap_asm.items(): - if autosomes := [x for x in asm.scaffolds if x.rank == 1]: + ranked_names_lengths = self.get_assembly_scaffold_lengths(hap, asm) + if autosomes := ranked_names_lengths.get(1): chr_counts[hap if hap else "Primary"] = len(autosomes) + if len(chr_counts) > 1: distinct_counts = set(chr_counts.values()) if len(distinct_counts) > 1: @@ -252,7 +254,7 @@ def check_for_large_haplotigs( if hap == "Haplotig": continue ranked_names_lengths = self.get_assembly_scaffold_lengths(hap, asm) - for rank in (1,2): + for rank in (1, 2): if names_lengths := ranked_names_lengths.get(rank): for frags_len in names_lengths.values(): if shortest and frags_len > shortest: diff --git a/tests/data/bChlMac1_3/bChlMac1_3-pretext-to-tpf.log b/tests/data/bChlMac1_3/bChlMac1_3-pretext-to-tpf.log index 6f5762e..c7917ca 100644 --- a/tests/data/bChlMac1_3/bChlMac1_3-pretext-to-tpf.log +++ b/tests/data/bChlMac1_3/bChlMac1_3-pretext-to-tpf.log @@ -50,5 +50,5 @@ bChlMac1_3-pretext-to-tpf.haplotigs 4,748,144 bp total Curation made 1 cut in a contig, 15 breaks at gaps and 29 joins -Mismatch in autosome count between Hap1 = 38 and Hap2 = 37 +Mismatch in autosome count between Hap1 = 36 and Hap2 = 35 Haplotig H_1 (Scaffold_75) is 4,560,100 bp which is longer than the shortest chromosome (292,585 bp)