Skip to content

Commit

Permalink
MAINT: code formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Vini2 committed Dec 13, 2023
1 parent 9c41851 commit c489559
Showing 1 changed file with 82 additions and 53 deletions.
135 changes: 82 additions & 53 deletions phables/workflow/scripts/phables.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,10 @@ def main():
all_self_looped = False
one_circular = False

if unitig_names[candidate_nodes[0]] in self_looped_nodes and unitig_names[candidate_nodes[1]] in self_looped_nodes:
if (
unitig_names[candidate_nodes[0]] in self_looped_nodes
and unitig_names[candidate_nodes[1]] in self_looped_nodes
):
all_self_looped = True
else:
if unitig_names[candidate_nodes[0]] in self_looped_nodes:
Expand Down Expand Up @@ -246,15 +249,16 @@ def main():
unitig2_len = len(str(graph_unitigs[unitig2_name]))

if unitig1 != "" and unitig2 != "":

# Case 2 - both are circular
if all_self_looped:
case2_found.add(my_count)

cycle_components.add(my_count)

phage_like_edges = phage_like_edges.union(set(candidate_nodes))
comp_resolved_edges = comp_resolved_edges.union(set(candidate_nodes))
comp_resolved_edges = comp_resolved_edges.union(
set(candidate_nodes)
)

unitig_to_consider = -1
unitig_name = ""
Expand Down Expand Up @@ -298,18 +302,22 @@ def main():
)

genome_path = GenomePath(
id = f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case = "case2_circular",
node_order = [
id=f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case2_circular",
node_order=[
f"{repeat_unitig_name}+",
f"{unitig_name}+",
f"{repeat_unitig_name}-",
],
node_id_order = [repeat_unitig, unitig_to_consider, repeat_unitig],
path = path_string,
coverage = int(unitig_coverages[unitig_name]),
length = len(path_string),
gc = (path_string.count("G") + path_string.count("C"))
node_id_order=[
repeat_unitig,
unitig_to_consider,
repeat_unitig,
],
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
length=len(path_string),
gc=(path_string.count("G") + path_string.count("C"))
/ len(path_string)
* 100,
)
Expand All @@ -320,26 +328,35 @@ def main():

# Case 2 - only one is circular
elif one_circular:

case2_found.add(my_count)

cycle_components.add(my_count)

phage_like_edges = phage_like_edges.union(set(candidate_nodes))
comp_resolved_edges = comp_resolved_edges.union(set(candidate_nodes))

comp_resolved_edges = comp_resolved_edges.union(
set(candidate_nodes)
)

unitig_to_consider = -1
unitig_name = ""

repeat_unitig = -1
repeat_unitig_name = ""

if unitig1_len > unitig2_len and unitig1_len > minlength and unitig2_name in self_looped_nodes:
if (
unitig1_len > unitig2_len
and unitig1_len > minlength
and unitig2_name in self_looped_nodes
):
unitig_to_consider = unitig1
unitig_name = unitig1_name
repeat_unitig = unitig2
repeat_unitig_name = unitig2_name
elif unitig2_len > unitig1_len and unitig2_len > minlength and unitig1_name in self_looped_nodes:
elif (
unitig2_len > unitig1_len
and unitig2_len > minlength
and unitig1_name in self_looped_nodes
):
unitig_to_consider = unitig2
unitig_name = unitig2_name
repeat_unitig = unitig1
Expand All @@ -355,7 +372,13 @@ def main():

# Get repeat count

repeat_count = max(int(unitig_coverages[repeat_unitig_name]/unitig_coverages[unitig_name]), 1)
repeat_count = max(
int(
unitig_coverages[repeat_unitig_name]
/ unitig_coverages[unitig_name]
),
1,
)
logger.debug(f"Repeat count: {repeat_count}")

path_string = (
Expand All @@ -368,31 +391,37 @@ def main():
graph_unitigs[repeat_unitig_name][
link_overlap[(unitig_to_consider, repeat_unitig)] :
]
) * repeat_count
)
* repeat_count
)
logger.debug(
f"Terminal repeat detected is {repeat_unitig_name}"
)

path_with_repeats = [f"{unitig_name}+"] + [f"{repeat_unitig_name}+" for x in range(repeat_count)]
node_id_order_with_repeats = [unitig_to_consider] + [repeat_unitig for x in range(repeat_count)]
path_with_repeats = [f"{unitig_name}+"] + [
f"{repeat_unitig_name}+" for x in range(repeat_count)
]
node_id_order_with_repeats = [unitig_to_consider] + [
repeat_unitig for x in range(repeat_count)
]

genome_path = GenomePath(
id = f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case = "case2_linear",
node_order = path_with_repeats,
node_id_order = node_id_order_with_repeats,
path = path_string,
coverage = int(unitig_coverages[unitig_name]),
length = len(path_string),
gc = (path_string.count("G") + path_string.count("C")) / len(path_string) * 100,
id=f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case2_linear",
node_order=path_with_repeats,
node_id_order=node_id_order_with_repeats,
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
length=len(path_string),
gc=(path_string.count("G") + path_string.count("C"))
/ len(path_string)
* 100,
)
my_genomic_paths.append(genome_path)
resolved_components.add(my_count)
resolved_cyclic.add(my_count)
case2_resolved.add(my_count)


# Case 3 components
elif len(candidate_nodes) > 2 and len(candidate_nodes) <= compcount:
# Create initial directed graph with coverage values
Expand Down Expand Up @@ -851,17 +880,17 @@ def main():

# Create GenomePath object with path details
genome_path = GenomePath(
id = f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case = "case3_circular",
node_order = [x for x in path_order],
node_id_order = [
id=f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case3_circular",
node_order=[x for x in path_order],
node_id_order=[
unitig_names_rev[x[:-1]]
for x in path_order
],
path = path_string,
coverage = int(coverage_val),
length = total_length,
gc = (
path=path_string,
coverage=int(coverage_val),
length=total_length,
gc=(
path_string.count("G")
+ path_string.count("C")
)
Expand Down Expand Up @@ -1249,17 +1278,17 @@ def main():

# Create GenomePath object with path details
genome_path = GenomePath(
id = f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case = "case3_linear",
node_order = [x for x in path_order],
node_id_order = [
id=f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case="case3_linear",
node_order=[x for x in path_order],
node_id_order=[
unitig_names_rev[x[:-1]]
for x in path_order
],
path = path_string,
coverage = int(coverage_val),
length = total_length,
gc = (
path=path_string,
coverage=int(coverage_val),
length=total_length,
gc=(
path_string.count("G")
+ path_string.count("C")
)
Expand Down Expand Up @@ -1312,14 +1341,14 @@ def main():

# Create GenomePath object with path details
genome_path = GenomePath(
id = f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case = case_name,
node_order = [unitig_names[candidate_nodes[0]]],
node_id_order = [candidate_nodes[0]],
path = path_string,
coverage = int(unitig_coverages[unitig_name]),
length = len(graph_unitigs[unitig_name]),
gc = (path_string.count("G") + path_string.count("C"))
id=f"phage_comp_{my_count}_cycle_{cycle_number}",
bubble_case=case_name,
node_order=[unitig_names[candidate_nodes[0]]],
node_id_order=[candidate_nodes[0]],
path=path_string,
coverage=int(unitig_coverages[unitig_name]),
length=len(graph_unitigs[unitig_name]),
gc=(path_string.count("G") + path_string.count("C"))
/ len(path_string)
* 100,
)
Expand Down

0 comments on commit c489559

Please sign in to comment.