Skip to content

Commit

Permalink
Support new version of Racon
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed May 28, 2018
1 parent 4e3fb88 commit 434cead
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 12 deletions.
35 changes: 28 additions & 7 deletions unicycler/miniasm_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import itertools
import collections
from .misc import green, red, line_iterator, print_table, int_to_str, float_to_str, \
reverse_complement, gfa_path
reverse_complement, gfa_path, racon_version
from .minimap_alignment import align_long_reads_to_assembly_graph, range_overlap_size, \
load_minimap_alignments
from .string_graph import StringGraph, StringGraphSegment, \
Expand Down Expand Up @@ -334,6 +334,9 @@ def polish_unitigs_with_racon(unitig_graph, miniasm_dir, read_dict, graph, racon
else: # Hybrid assembly
racon_loop_count = settings.RACON_POLISH_LOOP_COUNT_HYBRID

# The Racon command will be different for older versions of Racon.
old_racon_version = (racon_version(racon_path) == '-')

for polish_round_count in range(racon_loop_count):

mappings_filename = os.path.join(polish_dir, ('%03d' % next(counter)) + '_alignments.paf')
Expand Down Expand Up @@ -366,16 +369,33 @@ def polish_unitigs_with_racon(unitig_graph, miniasm_dir, read_dict, graph, racon
break

# Run Racon. It crashes sometimes, so repeat until its return code is 0.
command = [racon_path, '--verbose', '9', '-t', str(threads), '--bq', '-1',
polish_reads, mappings_filename, current_fasta, polished_fasta]
return_code = 1
for t in range(100): # Only try a fixed number of times, to prevent an infinite loop.

# The old version of Racon takes the output file (polished fasta) as an argument.
if old_racon_version:
command = [racon_path, '--verbose', '9', '-t', str(threads), '--bq', '-1',
polish_reads, mappings_filename, current_fasta, polished_fasta]

# The new version of Racon outputs the polished fasta to stdout.
else:
command = [racon_path, '-t', str(threads), polish_reads, mappings_filename,
current_fasta]

process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = process.communicate()
with open(racon_log, 'wb') as log_file:
log_file.write(out)
log_file.write(err)
return_code = process.returncode

if old_racon_version:
with open(racon_log, 'wb') as log_file:
log_file.write(out)
log_file.write(err)
else:
with open(racon_log, 'wb') as log_file:
log_file.write(err)
with open(polished_fasta, 'wb') as out_file:
out_file.write(out)

if return_code == 0 and os.path.isfile(polished_fasta):
break
if os.path.isfile(polished_fasta):
Expand All @@ -387,7 +407,8 @@ def polish_unitigs_with_racon(unitig_graph, miniasm_dir, read_dict, graph, racon
if return_code != 0 or not os.path.isfile(polished_fasta):
break

unitig_graph.replace_with_polished_sequences(polished_fasta, scoring_scheme)
unitig_graph.replace_with_polished_sequences(polished_fasta, scoring_scheme,
old_racon_version)
unitig_graph.save_to_fasta(fixed_fasta)
unitig_graph.rotate_circular_sequences()
unitig_graph.save_to_fasta(rotated_fasta)
Expand Down
17 changes: 14 additions & 3 deletions unicycler/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,11 +941,22 @@ def racon_path_and_version(racon_path):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out, _ = process.communicate()
out = out.decode().lower()
if 'racon' in out and 'options' in out:
return found_racon_path, '-', 'good'
else:
if 'racon' not in out or 'options' not in out:
return found_racon_path, '-', 'bad'

return found_racon_path, racon_version(found_racon_path), 'good'


def racon_version(found_racon_path):
command = [found_racon_path, '--version']
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out, _ = process.communicate()
out = out.decode().lower()
if out.startswith('v'):
return out[1:]
else:
return '-'


def makeblastdb_path_and_version(makeblastdb_path):
found_makeblastdb_path = shutil.which(makeblastdb_path)
Expand Down
10 changes: 8 additions & 2 deletions unicycler/string_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,20 @@ def get_connected_segments(self, seg_name):
connected_segments.add(get_unsigned_seg_name(segment))
return list(connected_segments)

def replace_with_polished_sequences(self, polished_fasta, scoring_scheme):
def replace_with_polished_sequences(self, polished_fasta, scoring_scheme, old_racon_version):
"""
Swaps out the current sequences with polished versions from Racon.
"""
polished_seqs = load_fasta(polished_fasta)
for seg_name, segment in self.segments.items():
try:
polished_seq = [x[1] for x in polished_seqs if 'Consensus_' + seg_name == x[0]][0]
# Old versions of Racon put 'Consensus_' on the front of contig names, but new
# versions don't.
if old_racon_version:
polished_seq = [x[1] for x in polished_seqs
if 'Consensus_' + seg_name == x[0]][0]
else:
polished_seq = [x[1] for x in polished_seqs if seg_name == x[0]][0]

# Racon sometimes drops the start or end of sequences, so we do some semi-global
# alignments to see if bases have been lost. If so, we put them back!
Expand Down

0 comments on commit 434cead

Please sign in to comment.