From 434ceadf48ccaad9ccede2bcec9c4890f937a683 Mon Sep 17 00:00:00 2001 From: Ryan Wick Date: Mon, 28 May 2018 12:49:24 +1000 Subject: [PATCH] Support new version of Racon --- unicycler/miniasm_assembly.py | 35 ++++++++++++++++++++++++++++------- unicycler/misc.py | 17 ++++++++++++++--- unicycler/string_graph.py | 10 ++++++++-- 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/unicycler/miniasm_assembly.py b/unicycler/miniasm_assembly.py index 9d338197..8166ae9e 100644 --- a/unicycler/miniasm_assembly.py +++ b/unicycler/miniasm_assembly.py @@ -21,7 +21,7 @@ import itertools import collections from .misc import green, red, line_iterator, print_table, int_to_str, float_to_str, \ - reverse_complement, gfa_path + reverse_complement, gfa_path, racon_version from .minimap_alignment import align_long_reads_to_assembly_graph, range_overlap_size, \ load_minimap_alignments from .string_graph import StringGraph, StringGraphSegment, \ @@ -334,6 +334,9 @@ def polish_unitigs_with_racon(unitig_graph, miniasm_dir, read_dict, graph, racon else: # Hybrid assembly racon_loop_count = settings.RACON_POLISH_LOOP_COUNT_HYBRID + # The Racon command will be different for older versions of Racon. + old_racon_version = (racon_version(racon_path) == '-') + for polish_round_count in range(racon_loop_count): mappings_filename = os.path.join(polish_dir, ('%03d' % next(counter)) + '_alignments.paf') @@ -366,16 +369,33 @@ def polish_unitigs_with_racon(unitig_graph, miniasm_dir, read_dict, graph, racon break # Run Racon. It crashes sometimes, so repeat until its return code is 0. - command = [racon_path, '--verbose', '9', '-t', str(threads), '--bq', '-1', - polish_reads, mappings_filename, current_fasta, polished_fasta] return_code = 1 for t in range(100): # Only try a fixed number of times, to prevent an infinite loop. + + # The old version of Racon takes the output file (polished fasta) as an argument. + if old_racon_version: + command = [racon_path, '--verbose', '9', '-t', str(threads), '--bq', '-1', + polish_reads, mappings_filename, current_fasta, polished_fasta] + + # The new version of Racon outputs the polished fasta to stdout. + else: + command = [racon_path, '-t', str(threads), polish_reads, mappings_filename, + current_fasta] + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate() - with open(racon_log, 'wb') as log_file: - log_file.write(out) - log_file.write(err) return_code = process.returncode + + if old_racon_version: + with open(racon_log, 'wb') as log_file: + log_file.write(out) + log_file.write(err) + else: + with open(racon_log, 'wb') as log_file: + log_file.write(err) + with open(polished_fasta, 'wb') as out_file: + out_file.write(out) + if return_code == 0 and os.path.isfile(polished_fasta): break if os.path.isfile(polished_fasta): @@ -387,7 +407,8 @@ def polish_unitigs_with_racon(unitig_graph, miniasm_dir, read_dict, graph, racon if return_code != 0 or not os.path.isfile(polished_fasta): break - unitig_graph.replace_with_polished_sequences(polished_fasta, scoring_scheme) + unitig_graph.replace_with_polished_sequences(polished_fasta, scoring_scheme, + old_racon_version) unitig_graph.save_to_fasta(fixed_fasta) unitig_graph.rotate_circular_sequences() unitig_graph.save_to_fasta(rotated_fasta) diff --git a/unicycler/misc.py b/unicycler/misc.py index 8311449f..27a7a266 100644 --- a/unicycler/misc.py +++ b/unicycler/misc.py @@ -941,11 +941,22 @@ def racon_path_and_version(racon_path): process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, _ = process.communicate() out = out.decode().lower() - if 'racon' in out and 'options' in out: - return found_racon_path, '-', 'good' - else: + if 'racon' not in out or 'options' not in out: return found_racon_path, '-', 'bad' + return found_racon_path, racon_version(found_racon_path), 'good' + + +def racon_version(found_racon_path): + command = [found_racon_path, '--version'] + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + out, _ = process.communicate() + out = out.decode().lower() + if out.startswith('v'): + return out[1:] + else: + return '-' + def makeblastdb_path_and_version(makeblastdb_path): found_makeblastdb_path = shutil.which(makeblastdb_path) diff --git a/unicycler/string_graph.py b/unicycler/string_graph.py index 4738a755..bc41122c 100644 --- a/unicycler/string_graph.py +++ b/unicycler/string_graph.py @@ -426,14 +426,20 @@ def get_connected_segments(self, seg_name): connected_segments.add(get_unsigned_seg_name(segment)) return list(connected_segments) - def replace_with_polished_sequences(self, polished_fasta, scoring_scheme): + def replace_with_polished_sequences(self, polished_fasta, scoring_scheme, old_racon_version): """ Swaps out the current sequences with polished versions from Racon. """ polished_seqs = load_fasta(polished_fasta) for seg_name, segment in self.segments.items(): try: - polished_seq = [x[1] for x in polished_seqs if 'Consensus_' + seg_name == x[0]][0] + # Old versions of Racon put 'Consensus_' on the front of contig names, but new + # versions don't. + if old_racon_version: + polished_seq = [x[1] for x in polished_seqs + if 'Consensus_' + seg_name == x[0]][0] + else: + polished_seq = [x[1] for x in polished_seqs if seg_name == x[0]][0] # Racon sometimes drops the start or end of sequences, so we do some semi-global # alignments to see if bases have been lost. If so, we put them back!