diff --git a/phables/workflow/scripts/phables_utils/genome_utils.py b/phables/workflow/scripts/phables_utils/genome_utils.py index 7f580d3..745fcea 100644 --- a/phables/workflow/scripts/phables_utils/genome_utils.py +++ b/phables/workflow/scripts/phables_utils/genome_utils.py @@ -1,7 +1,7 @@ # Class for genome path class GenomePath: def __init__( - self, id, bubble_case, node_order, node_id_order, path, coverage, length, gc + self, id, bubble_case, node_order, node_order_human, node_id_order, path, coverage, length, gc ): self.id = id self.bubble_case = bubble_case @@ -9,6 +9,7 @@ def __init__( self.coverage = coverage self.length = length self.node_order = node_order + self.node_order_human = node_order_human self.node_id_order = node_id_order self.gc = gc diff --git a/phables/workflow/scripts/phables_utils/long_utils.py b/phables/workflow/scripts/phables_utils/long_utils.py index cc01ca3..a21f43e 100644 --- a/phables/workflow/scripts/phables_utils/long_utils.py +++ b/phables/workflow/scripts/phables_utils/long_utils.py @@ -190,6 +190,7 @@ def resolve_long( f"{unitig_name}+", f"{repeat_unitig_name}-", ], + node_order_human=f"{repeat_unitig_name}:fwd,{unitig_name}:fwd,{repeat_unitig_name}:rev", node_id_order=[ repeat_unitig, unitig_to_consider, @@ -279,9 +280,13 @@ def resolve_long( f"Terminal repeat detected is {repeat_unitig_name}" ) + # Format path node order path_with_repeats = [f"{unitig_name}+"] + [ f"{repeat_unitig_name}+" for x in range(repeat_count) ] + + repeat_order = f"{repeat_unitig_name}:fwd," * repeat_count + path_with_repeats_human = f"{unitig_name}:fwd,{repeat_order[:-1]}" node_id_order_with_repeats = [unitig_to_consider] + [ repeat_unitig for x in range(repeat_count) ] @@ -290,6 +295,7 @@ def resolve_long( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case="case2_linear", node_order=path_with_repeats, + node_order_human=path_with_repeats_human, node_id_order=node_id_order_with_repeats, path=path_string, coverage=int(unitig_coverages[unitig_name]), @@ -784,11 +790,23 @@ def resolve_long( previous_edge = node + # Format genomic path + path_node_order_human = "" + + for c in path_order: + if c.endswith("+"): + path_node_order_human += f"{c[:-1]}:fwd," + else: + path_node_order_human += f"{c[:-1]}:rev," + + path_node_order_human = path_node_order_human[:-1] + # Create GenomePath object with path details genome_path = GenomePath( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case="case3_circular", node_order=[x for x in path_order], + node_order_human=path_node_order_human, node_id_order=[ unitig_names_rev[x[:-1]] for x in path_order @@ -1207,11 +1225,23 @@ def resolve_long( previous_edge = node + # Format genomic path + path_node_order_human = "" + + for c in path_order: + if c.endswith("+"): + path_node_order_human += f"{c[:-1]}:fwd," + else: + path_node_order_human += f"{c[:-1]}:rev," + + path_node_order_human = path_node_order_human[:-1] + # Create GenomePath object with path details genome_path = GenomePath( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case="case3_linear", node_order=[x for x in path_order], + node_order_human=path_node_order_human, node_id_order=[ unitig_names_rev[x[:-1]] for x in path_order @@ -1275,6 +1305,7 @@ def resolve_long( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case=case_name, node_order=[unitig_names[candidate_nodes[0]]], + node_order_human=f"{unitig_names[candidate_nodes[0]]}:fwd", node_id_order=[candidate_nodes[0]], path=path_string, coverage=int(unitig_coverages[unitig_name]), diff --git a/phables/workflow/scripts/phables_utils/output_utils.py b/phables/workflow/scripts/phables_utils/output_utils.py index f1d055e..a36888d 100644 --- a/phables/workflow/scripts/phables_utils/output_utils.py +++ b/phables/workflow/scripts/phables_utils/output_utils.py @@ -91,10 +91,10 @@ def write_res_genome_info(all_resolved_paths, output): """ with open(f"{output}/resolved_genome_info.txt", "w") as myfile: - myfile.write(f"Path\tCase\tCoverage\tLength\tGC content\tNode order\n") + myfile.write(f"Path\tCase\tCoverage\tLength\tGC content\tNode order\tNode order human\n") for genomic_path in all_resolved_paths: myfile.write( - f"{genomic_path.id}\t{genomic_path.bubble_case}\t{genomic_path.coverage}\t{genomic_path.length}\t{genomic_path.gc}\t{genomic_path.node_order}\n" + f"{genomic_path.id}\t{genomic_path.bubble_case}\t{genomic_path.coverage}\t{genomic_path.length}\t{genomic_path.gc}\t{genomic_path.node_order}\t{genomic_path.node_order_human}\n" ) return "resolved_genome_info.txt" diff --git a/phables/workflow/scripts/phables_utils/short_utils.py b/phables/workflow/scripts/phables_utils/short_utils.py index 085afe0..a268ba0 100644 --- a/phables/workflow/scripts/phables_utils/short_utils.py +++ b/phables/workflow/scripts/phables_utils/short_utils.py @@ -190,6 +190,7 @@ def resolve_short( f"{unitig_name}+", f"{repeat_unitig_name}-", ], + node_order_human=f"{repeat_unitig_name}:fwd,{unitig_name}:fwd,{repeat_unitig_name}:rev", node_id_order=[ repeat_unitig, unitig_to_consider, @@ -279,9 +280,13 @@ def resolve_short( f"Terminal repeat detected is {repeat_unitig_name}" ) + # Format path node order path_with_repeats = [f"{unitig_name}+"] + [ f"{repeat_unitig_name}+" for x in range(repeat_count) ] + + repeat_order = f"{repeat_unitig_name}:fwd," * repeat_count + path_with_repeats_human = f"{unitig_name}:fwd,{repeat_order[:-1]}" node_id_order_with_repeats = [unitig_to_consider] + [ repeat_unitig for x in range(repeat_count) ] @@ -290,6 +295,7 @@ def resolve_short( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case="case2_linear", node_order=path_with_repeats, + node_order_human=path_with_repeats_human, node_id_order=node_id_order_with_repeats, path=path_string, coverage=int(unitig_coverages[unitig_name]), @@ -759,11 +765,23 @@ def resolve_short( previous_edge = node + # Format genomic path + path_node_order_human = "" + + for c in path_order: + if c.endswith("+"): + path_node_order_human += f"{c[:-1]}:fwd," + else: + path_node_order_human += f"{c[:-1]}:rev," + + path_node_order_human = path_node_order_human[:-1] + # Create GenomePath object with path details genome_path = GenomePath( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case="case3_circular", node_order=[x for x in path_order], + node_order_human=path_node_order_human, node_id_order=[ unitig_names_rev[x[:-1]] for x in path_order @@ -1157,11 +1175,23 @@ def resolve_short( previous_edge = node + # Format genomic path + path_node_order_human = "" + + for c in path_order: + if c.endswith("+"): + path_node_order_human += f"{c[:-1]}:fwd," + else: + path_node_order_human += f"{c[:-1]}:rev," + + path_node_order_human = path_node_order_human[:-1] + # Create GenomePath object with path details genome_path = GenomePath( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case="case3_linear", node_order=[x for x in path_order], + node_order_human=path_node_order_human, node_id_order=[ unitig_names_rev[x[:-1]] for x in path_order @@ -1225,6 +1255,7 @@ def resolve_short( id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}", bubble_case=case_name, node_order=[unitig_names[candidate_nodes[0]]], + node_order_human=f"{unitig_names[candidate_nodes[0]]}:fwd", node_id_order=[candidate_nodes[0]], path=path_string, coverage=int(unitig_coverages[unitig_name]),