Skip to content

Commit

Permalink
DEV: Updated genome path format in output to fix #41
Browse files Browse the repository at this point in the history
  • Loading branch information
Vini2 committed Feb 22, 2024
1 parent 10a5b23 commit e73d6eb
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 3 deletions.
3 changes: 2 additions & 1 deletion phables/workflow/scripts/phables_utils/genome_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# Class for genome path
class GenomePath:
def __init__(
self, id, bubble_case, node_order, node_id_order, path, coverage, length, gc
self, id, bubble_case, node_order, node_order_human, node_id_order, path, coverage, length, gc
):
self.id = id
self.bubble_case = bubble_case
self.path = path
self.coverage = coverage
self.length = length
self.node_order = node_order
self.node_order_human = node_order_human
self.node_id_order = node_id_order
self.gc = gc

Expand Down
31 changes: 31 additions & 0 deletions phables/workflow/scripts/phables_utils/long_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def resolve_long(
f"{unitig_name}+",
f"{repeat_unitig_name}-",
],
node_order_human=f"{repeat_unitig_name}:fwd,{unitig_name}:fwd,{repeat_unitig_name}:rev",
node_id_order=[
repeat_unitig,
unitig_to_consider,
Expand Down Expand Up @@ -279,9 +280,13 @@ def resolve_long(
f"Terminal repeat detected is {repeat_unitig_name}"
)

# Format path node order
path_with_repeats = [f"{unitig_name}+"] + [
f"{repeat_unitig_name}+" for x in range(repeat_count)
]

repeat_order = f"{repeat_unitig_name}:fwd," * repeat_count
path_with_repeats_human = f"{unitig_name}:fwd,{repeat_order[:-1]}"
node_id_order_with_repeats = [unitig_to_consider] + [
repeat_unitig for x in range(repeat_count)
]
Expand All @@ -290,6 +295,7 @@ def resolve_long(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case2_linear",
node_order=path_with_repeats,
node_order_human=path_with_repeats_human,
node_id_order=node_id_order_with_repeats,
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
Expand Down Expand Up @@ -784,11 +790,23 @@ def resolve_long(

previous_edge = node

# Format genomic path
path_node_order_human = ""

for c in path_order:
if c.endswith("+"):
path_node_order_human += f"{c[:-1]}:fwd,"
else:
path_node_order_human += f"{c[:-1]}:rev,"

path_node_order_human = path_node_order_human[:-1]

# Create GenomePath object with path details
genome_path = GenomePath(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case3_circular",
node_order=[x for x in path_order],
node_order_human=path_node_order_human,
node_id_order=[
unitig_names_rev[x[:-1]]
for x in path_order
Expand Down Expand Up @@ -1207,11 +1225,23 @@ def resolve_long(

previous_edge = node

# Format genomic path
path_node_order_human = ""

for c in path_order:
if c.endswith("+"):
path_node_order_human += f"{c[:-1]}:fwd,"
else:
path_node_order_human += f"{c[:-1]}:rev,"

path_node_order_human = path_node_order_human[:-1]

# Create GenomePath object with path details
genome_path = GenomePath(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case3_linear",
node_order=[x for x in path_order],
node_order_human=path_node_order_human,
node_id_order=[
unitig_names_rev[x[:-1]]
for x in path_order
Expand Down Expand Up @@ -1275,6 +1305,7 @@ def resolve_long(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case=case_name,
node_order=[unitig_names[candidate_nodes[0]]],
node_order_human=f"{unitig_names[candidate_nodes[0]]}:fwd",
node_id_order=[candidate_nodes[0]],
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
Expand Down
4 changes: 2 additions & 2 deletions phables/workflow/scripts/phables_utils/output_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ def write_res_genome_info(all_resolved_paths, output):
"""

with open(f"{output}/resolved_genome_info.txt", "w") as myfile:
myfile.write(f"Path\tCase\tCoverage\tLength\tGC content\tNode order\n")
myfile.write(f"Path\tCase\tCoverage\tLength\tGC content\tNode order\tNode order human\n")
for genomic_path in all_resolved_paths:
myfile.write(
f"{genomic_path.id}\t{genomic_path.bubble_case}\t{genomic_path.coverage}\t{genomic_path.length}\t{genomic_path.gc}\t{genomic_path.node_order}\n"
f"{genomic_path.id}\t{genomic_path.bubble_case}\t{genomic_path.coverage}\t{genomic_path.length}\t{genomic_path.gc}\t{genomic_path.node_order}\t{genomic_path.node_order_human}\n"
)

return "resolved_genome_info.txt"
Expand Down
31 changes: 31 additions & 0 deletions phables/workflow/scripts/phables_utils/short_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def resolve_short(
f"{unitig_name}+",
f"{repeat_unitig_name}-",
],
node_order_human=f"{repeat_unitig_name}:fwd,{unitig_name}:fwd,{repeat_unitig_name}:rev",
node_id_order=[
repeat_unitig,
unitig_to_consider,
Expand Down Expand Up @@ -279,9 +280,13 @@ def resolve_short(
f"Terminal repeat detected is {repeat_unitig_name}"
)

# Format path node order
path_with_repeats = [f"{unitig_name}+"] + [
f"{repeat_unitig_name}+" for x in range(repeat_count)
]

repeat_order = f"{repeat_unitig_name}:fwd," * repeat_count
path_with_repeats_human = f"{unitig_name}:fwd,{repeat_order[:-1]}"
node_id_order_with_repeats = [unitig_to_consider] + [
repeat_unitig for x in range(repeat_count)
]
Expand All @@ -290,6 +295,7 @@ def resolve_short(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case2_linear",
node_order=path_with_repeats,
node_order_human=path_with_repeats_human,
node_id_order=node_id_order_with_repeats,
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
Expand Down Expand Up @@ -759,11 +765,23 @@ def resolve_short(

previous_edge = node

# Format genomic path
path_node_order_human = ""

for c in path_order:
if c.endswith("+"):
path_node_order_human += f"{c[:-1]}:fwd,"
else:
path_node_order_human += f"{c[:-1]}:rev,"

path_node_order_human = path_node_order_human[:-1]

# Create GenomePath object with path details
genome_path = GenomePath(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case3_circular",
node_order=[x for x in path_order],
node_order_human=path_node_order_human,
node_id_order=[
unitig_names_rev[x[:-1]]
for x in path_order
Expand Down Expand Up @@ -1157,11 +1175,23 @@ def resolve_short(

previous_edge = node

# Format genomic path
path_node_order_human = ""

for c in path_order:
if c.endswith("+"):
path_node_order_human += f"{c[:-1]}:fwd,"
else:
path_node_order_human += f"{c[:-1]}:rev,"

path_node_order_human = path_node_order_human[:-1]

# Create GenomePath object with path details
genome_path = GenomePath(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case3_linear",
node_order=[x for x in path_order],
node_order_human=path_node_order_human,
node_id_order=[
unitig_names_rev[x[:-1]]
for x in path_order
Expand Down Expand Up @@ -1225,6 +1255,7 @@ def resolve_short(
id=f"{prefix}phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case=case_name,
node_order=[unitig_names[candidate_nodes[0]]],
node_order_human=f"{unitig_names[candidate_nodes[0]]}:fwd",
node_id_order=[candidate_nodes[0]],
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
Expand Down

0 comments on commit e73d6eb

Please sign in to comment.