diff --git a/bin/metaWRAP b/bin/metaWRAP deleted file mode 120000 index 743fbfa..0000000 --- a/bin/metaWRAP +++ /dev/null @@ -1 +0,0 @@ -metawrap \ No newline at end of file diff --git a/bin/metawrap-scripts/add_bins_to_blobplot.py b/bin/metawrap-scripts/add_bins_to_blobplot.py index 60ef998..38f20ec 100755 --- a/bin/metawrap-scripts/add_bins_to_blobplot.py +++ b/bin/metawrap-scripts/add_bins_to_blobplot.py @@ -2,6 +2,7 @@ # THis script takes in a blobplot text file and and number of bins as input, and annotates # each line in the blob file with its bin name (if that contig exists in one of the bins) +from __future__ import print_function import sys, os # load the binned contigs: @@ -19,11 +20,11 @@ if line.split("\t")[0]=="seqid": for i, field in enumerate(line.strip().split("\t")): if field=="taxlevel_phylum": phylum_column=i - print line.strip() + "\tbin\tbinned_yes_no\tbinned_phylum" + print(line.strip() + "\tbin\tbinned_yes_no\tbinned_phylum") elif line.split("\t")[0] in contig_bins: phylum=line.split("\t")[phylum_column] - print "\t".join([line.strip(), contig_bins[line.split("\t")[0]], "Binned", phylum]) + print("\t".join([line.strip(), contig_bins[line.split("\t")[0]], "Binned", phylum])) else: - print "\t".join([line.strip(), "Unbinned", "Unbinned", "Unbinned"]) + print("\t".join([line.strip(), "Unbinned", "Unbinned", "Unbinned"])) diff --git a/bin/metawrap-scripts/binning_refiner.py b/bin/metawrap-scripts/binning_refiner.py index b188328..7c3b459 100755 --- a/bin/metawrap-scripts/binning_refiner.py +++ b/bin/metawrap-scripts/binning_refiner.py @@ -28,6 +28,7 @@ # And the publication: https://www.ncbi.nlm.nih.gov/pubmed/28186226 +from __future__ import print_function import os import glob import shutil diff --git a/bin/metawrap-scripts/blobology/rm_short_contigs.py b/bin/metawrap-scripts/blobology/rm_short_contigs.py index ac7a4bd..21eb08c 100755 --- a/bin/metawrap-scripts/blobology/rm_short_contigs.py +++ b/bin/metawrap-scripts/blobology/rm_short_contigs.py @@ -1,5 +1,6 @@ #! /usr/bin/env python +from __future__ import print_function import sys f=open(sys.argv[1]) @@ -7,6 +8,6 @@ if line.startswith(">"): cut=line.split("_") if int(cut[3])>999: - print line.strip() + print(line.strip()) else: quit - else: print line.strip() + else: print(line.strip()) diff --git a/bin/metawrap-scripts/choose_best_bin.py b/bin/metawrap-scripts/choose_best_bin.py index 4a8b0bc..bce315e 100755 --- a/bin/metawrap-scripts/choose_best_bin.py +++ b/bin/metawrap-scripts/choose_best_bin.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys # This script takes in the reassembled_bins.stats file of the binning module and choses the best possible @@ -34,7 +35,7 @@ for i in best_bins: - print i+'.'+best_bins[i][0] + print(i+'.'+best_bins[i][0]) diff --git a/bin/metawrap-scripts/classify_bins.py b/bin/metawrap-scripts/classify_bins.py index 7ff8032..e57e50e 100755 --- a/bin/metawrap-scripts/classify_bins.py +++ b/bin/metawrap-scripts/classify_bins.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys,os def add_to_tree( tree, tax_list, length ): @@ -60,5 +61,5 @@ def traverse(tree, taxonomy, weight): #print tax_tree consensus=traverse(tax_tree, [], 0) - print filename + "\t" + ";".join(consensus) + print(filename + "\t" + ";".join(consensus)) diff --git a/bin/metawrap-scripts/consolidate_two_sets_of_bins.py b/bin/metawrap-scripts/consolidate_two_sets_of_bins.py index d5b9c28..3c88fae 100755 --- a/bin/metawrap-scripts/consolidate_two_sets_of_bins.py +++ b/bin/metawrap-scripts/consolidate_two_sets_of_bins.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys, os ''' @@ -18,7 +19,7 @@ x=float(sys.argv[7]) # load a list of good bins (>70% complete, <10% contaminated) to save time (wont look at bad bins later on). -print "Loading list of good bins (comp>" + str(c)+ "%, cont<" + str(x) + "%)" +print("Loading list of good bins (comp>" + str(c)+ "%, cont<" + str(x) + "%)") good_bins_1={} good_bins_2={} for line in open(sys.argv[3]): @@ -50,7 +51,7 @@ ''' -print "load in the info about the contigs in each bin..." +print("load in the info about the contigs in each bin...") for bin_file in good_bins_1: bins_1[bin_file]={} contig_len=0 @@ -82,7 +83,7 @@ -print "make all bossible comparisons between the two bin sets, and record total % idential length" +print("make all bossible comparisons between the two bin sets, and record total % idential length") all_bin_pairs={} for bin_1 in good_bins_1: all_bin_pairs[bin_1]={} @@ -107,7 +108,7 @@ all_bin_pairs[bin_1][bin_2]=max([ratio_1, ratio_2]) -print "load in completion and contamination scores of all the bins" +print("load in completion and contamination scores of all the bins") bins_1_stats={} bins_2_stats={} bins_1_summary={} @@ -131,7 +132,7 @@ # go through all good bins and chose best ones -print "go through first group, pull out identical bins from second group, and choose best" +print("go through first group, pull out identical bins from second group, and choose best") os.system("mkdir "+sys.argv[5]) new_summary_file=bins_1_summary["header"] bins_2_matches={} @@ -154,7 +155,7 @@ os.system(cmd) bin_ct+=1 -print "retrieve bins from second group that were not found in first group" +print("retrieve bins from second group that were not found in first group") for bin_2 in bins_2_stats: if bins_2_stats[bin_2][0]x: continue if bin_2 in bins_2_matches: continue @@ -167,6 +168,6 @@ f = open(sys.argv[5]+".stats", 'w') f.write(new_summary_file) -print "There were " + str(bin_ct) + " bins cherry-picked from the original sets!" +print("There were " + str(bin_ct) + " bins cherry-picked from the original sets!") diff --git a/bin/metawrap-scripts/dereplicate_contigs_in_bins.py b/bin/metawrap-scripts/dereplicate_contigs_in_bins.py index c1b2191..0371b7b 100755 --- a/bin/metawrap-scripts/dereplicate_contigs_in_bins.py +++ b/bin/metawrap-scripts/dereplicate_contigs_in_bins.py @@ -1,10 +1,11 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys, os # Usage: ./script.py bins.stats binsFolder outFolder # load in bin completion and contamination scores -print "Loading in bin completion and contamination scores..." +print("Loading in bin completion and contamination scores...") bin_scores={} for line in open(sys.argv[1]): if "completeness" in line: continue @@ -13,7 +14,7 @@ bin_scores[cut[0]]=score # load in contigs in each bin -print "Loading in contigs in each bin..." +print("Loading in contigs in each bin...") contig_mapping={} for bin_file in os.listdir(sys.argv[2]): bin_name=".".join(bin_file.split("/")[-1].split(".")[:-1]) @@ -29,7 +30,7 @@ # go over the bin files again and make a new dereplicated version of each bin file -print "Making a new dereplicated version of each bin file" +print("Making a new dereplicated version of each bin file") os.system("mkdir "+sys.argv[3]) for bin_file in os.listdir(sys.argv[2]): bin_name=".".join(bin_file.split("/")[-1].split(".")[:-1]) diff --git a/bin/metawrap-scripts/filter_nanopore_reads_for_bin_reassembly.py b/bin/metawrap-scripts/filter_nanopore_reads_for_bin_reassembly.py index 04599d2..0a31550 100755 --- a/bin/metawrap-scripts/filter_nanopore_reads_for_bin_reassembly.py +++ b/bin/metawrap-scripts/filter_nanopore_reads_for_bin_reassembly.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys, os complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'a':'t', 't':'a', 'c':'g', 'g':'c', 'N':'N', 'n':'n', '*':'*'} @@ -9,7 +10,7 @@ def rev_comp(seq): return rev_comp[::-1] # load bin contigs -print "loading contig to bin mappings..." +print("loading contig to bin mappings...") contig_bins={} for bin_file in os.listdir(sys.argv[1]): if bin_file.endswith(".fa") or bin_file.endswith(".fasta"): @@ -21,7 +22,7 @@ def rev_comp(seq): # store the read names and what bins they belong in in these dictionaries # strict stores only perfectly aligning reads and permissive stores any aligned reads -print "Parsing sam file and writing reads to appropriate files depending what bin they alligned to..." +print("Parsing sam file and writing reads to appropriate files depending what bin they alligned to...") files={} opened_bins={} for line in sys.stdin: @@ -60,9 +61,9 @@ def rev_comp(seq): files[sys.argv[2]+"/"+bin_name+".nanopore.fastq"].write('@' + cut[0] + "/1" + "\n" + cut[9] + "\n+\n" + cut[10] + "\n") -print "closing files" +print("closing files") for f in files: files[f].close() -print "Finished splitting reads!" +print("Finished splitting reads!") diff --git a/bin/metawrap-scripts/filter_out_fastq_reads.py b/bin/metawrap-scripts/filter_out_fastq_reads.py index 1e936dc..6f59cdf 100755 --- a/bin/metawrap-scripts/filter_out_fastq_reads.py +++ b/bin/metawrap-scripts/filter_out_fastq_reads.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys # this script takes in a list of fastq read names and picks them out of a .fastq file. @@ -20,7 +21,7 @@ # if soi in line: found=True if found==True: - print line + print(line) sys.stdout.flush() diff --git a/bin/metawrap-scripts/filter_reads_for_bin_reassembly.py b/bin/metawrap-scripts/filter_reads_for_bin_reassembly.py index 192d744..9b336bb 100755 --- a/bin/metawrap-scripts/filter_reads_for_bin_reassembly.py +++ b/bin/metawrap-scripts/filter_reads_for_bin_reassembly.py @@ -1,6 +1,7 @@ #!/usr/bin/env python2.7 #usage: # bwa mem -a assembly.fa reads_1.fastq reads_2.fastq | ./filter_reads_for_bin_reassembly.py original_bin_folder reads_1.fastq reads_2.fastq output_dir +from __future__ import print_function import sys, os strict_snp_cutoff = int(sys.argv[3]) permissive_snp_cutoff = int(sys.argv[4]) @@ -13,7 +14,7 @@ def rev_comp(seq): return rev_comp[::-1] # load bin contigs -print "loading contig to bin mappings..." +print("loading contig to bin mappings...") contig_bins={} for bin_file in os.listdir(sys.argv[1]): if bin_file.endswith(".fa") or bin_file.endswith(".fasta"): @@ -25,7 +26,7 @@ def rev_comp(seq): # store the read names and what bins they belong in in these dictionaries # strict stores only perfectly aligning reads and permissive stores any aligned reads -print "Parsing sam file and writing reads to appropriate files depending what bin they alligned to..." +print("Parsing sam file and writing reads to appropriate files depending what bin they alligned to...") files={} opened_bins={} for line in sys.stdin: @@ -106,11 +107,11 @@ def rev_comp(seq): files[sys.argv[2]+"/"+bin_name+".permissive_2.fastq"].write('@' + R_cut[0] + "/2" + "\n" + R_cut[9] + "\n+\n" + R_cut[10] + "\n") -print "closing files" +print("closing files") for f in files: files[f].close() -print "Finished splitting reads!" +print("Finished splitting reads!") diff --git a/bin/metawrap-scripts/fix_config_naming.py b/bin/metawrap-scripts/fix_config_naming.py index 1ea9688..b1733d6 100755 --- a/bin/metawrap-scripts/fix_config_naming.py +++ b/bin/metawrap-scripts/fix_config_naming.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys for line in open(sys.argv[1]): @@ -7,4 +8,4 @@ if c=="=": c="_" sys.stdout.write(c) else: - print line.rstrip() + print(line.rstrip()) diff --git a/bin/metawrap-scripts/fix_megahit_contig_naming.py b/bin/metawrap-scripts/fix_megahit_contig_naming.py index 432adc1..0104c8d 100755 --- a/bin/metawrap-scripts/fix_megahit_contig_naming.py +++ b/bin/metawrap-scripts/fix_megahit_contig_naming.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys import textwrap @@ -25,5 +26,5 @@ for k in sorted(dic, key=lambda k: len(dic[k]), reverse=True): - print k - print textwrap.fill(dic[k], 100, break_on_hyphens = False) + print(k) + print(textwrap.fill(dic[k], 100, break_on_hyphens = False)) diff --git a/bin/metawrap-scripts/gather-counts.py b/bin/metawrap-scripts/gather-counts.py index f454ff9..db1c7f1 100755 --- a/bin/metawrap-scripts/gather-counts.py +++ b/bin/metawrap-scripts/gather-counts.py @@ -11,6 +11,7 @@ C. Titus Brown, 11/2015 """ +from __future__ import print_function import os, os.path import sys import csv @@ -19,9 +20,9 @@ def process_quant_file(root, filename, outname): """ Convert individual quant.sf files into .counts files (transcripts\tcount). """ - print >>sys.stderr, 'Loading counts from:', root, filename + print('Loading counts from:', root, filename, file=sys.stderr) outfp = open(outname, 'w') - print >>outfp, "transcript\tcount" + print("transcript\tcount", file=outfp) d = {} full_file = os.path.join(root, filename) @@ -30,7 +31,7 @@ def process_quant_file(root, filename, outname): continue name, length, eff_length, tpm, count = line.strip().split('\t') - print >>outfp, "%s\t%s" % (name, float(tpm)) + print("%s\t%s" % (name, float(tpm)), file=outfp) def main(): @@ -43,7 +44,7 @@ def main(): quantlist = [] start_dir = '.' - print >>sys.stderr, 'Starting in:', os.path.abspath(start_dir) + print('Starting in:', os.path.abspath(start_dir), file=sys.stderr) for root, dirs, files in os.walk('.'): for filename in files: if filename.endswith('quant.sf'): @@ -54,7 +55,7 @@ def main(): break - print ",\n".join([ "\"%s\"" % i for i in sorted(quantlist)]) + print(",\n".join([ "\"%s\"" % i for i in sorted(quantlist)])) if __name__ == '__main__': main() diff --git a/bin/metawrap-scripts/interleave_fastq.py b/bin/metawrap-scripts/interleave_fastq.py index 73d0ba4..20cc51f 100755 --- a/bin/metawrap-scripts/interleave_fastq.py +++ b/bin/metawrap-scripts/interleave_fastq.py @@ -5,6 +5,7 @@ Usage: interleave-fasta fasta_file1 fasta_file2 """ +from __future__ import print_function import sys @@ -15,20 +16,20 @@ def interleave(f1, f2): line = f1.readline() if line.strip() == "": break - print line.strip() + print(line.strip()) for i in xrange(3): - print f1.readline().strip() + print(f1.readline().strip()) for i in xrange(4): - print f2.readline().strip() + print(f2.readline().strip()) if __name__ == '__main__': try: file1 = sys.argv[1] file2 = sys.argv[2] except: - print __doc__ + print(__doc__) sys.exit(1) if file1[-2:] == "gz": diff --git a/bin/metawrap-scripts/kraken2_translate.py b/bin/metawrap-scripts/kraken2_translate.py index 518770f..19897de 100755 --- a/bin/metawrap-scripts/kraken2_translate.py +++ b/bin/metawrap-scripts/kraken2_translate.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +from __future__ import print_function import sys import os diff --git a/bin/metawrap-scripts/kraken_to_krona.py b/bin/metawrap-scripts/kraken_to_krona.py index a79669f..16c4b40 100755 --- a/bin/metawrap-scripts/kraken_to_krona.py +++ b/bin/metawrap-scripts/kraken_to_krona.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys # This script takes in a translated kraken file of either contigs (from SPAdes) or reads, and parses it into a format for ktImportText to produce a kronachart. data={} @@ -24,5 +25,5 @@ for tax in data: - print str(data[tax]) + "\t" + tax + print(str(data[tax]) + "\t" + tax) diff --git a/bin/metawrap-scripts/make_bin_table.py b/bin/metawrap-scripts/make_bin_table.py index a5cdb85..99f51b2 100755 --- a/bin/metawrap-scripts/make_bin_table.py +++ b/bin/metawrap-scripts/make_bin_table.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys, os @@ -6,5 +7,5 @@ bin_name=".".join(filename.split(".")[:-1]) for line in open(sys.argv[1]+'/'+filename): if line[0]=='>': - print line[1:-1]+'\t'+bin_name + print(line[1:-1]+'\t'+bin_name) diff --git a/bin/metawrap-scripts/make_heatmap.py b/bin/metawrap-scripts/make_heatmap.py index cc97be2..508a018 100755 --- a/bin/metawrap-scripts/make_heatmap.py +++ b/bin/metawrap-scripts/make_heatmap.py @@ -1,5 +1,6 @@ #!/usr/bin/env python2.7 -print "loading libs..." +from __future__ import print_function +print("loading libs...") import sys import numpy as np import pandas as pd @@ -12,7 +13,7 @@ def load_lib_sizes(filename): - print "loading library sizes..." + print("loading library sizes...") libs={} for line in open(filename): if line.startswith("#"): continue @@ -23,7 +24,7 @@ def load_lib_sizes(filename): def load_data(filename): - print "loading abundance data..." + print("loading abundance data...") df=pd.read_csv(filename, sep='\t', index_col=0) # remove all 0 rows @@ -36,7 +37,7 @@ def load_data(filename): def set_colors_to_timeline(df): - print "adding colored labels..." + print("adding colored labels...") lut=[] for sample in df.columns.values: if "2013-04" in sample: lut.append('m') @@ -50,7 +51,7 @@ def set_colors_to_timeline(df): def draw_clustermap(df, lut): - print "drawing clustermap..." + print("drawing clustermap...") sns.set(font_scale=1) df = df.fillna(0) if lut!=False: diff --git a/bin/metawrap-scripts/parse_read_mappings.py b/bin/metawrap-scripts/parse_read_mappings.py index 853d4dc..74ed938 100755 --- a/bin/metawrap-scripts/parse_read_mappings.py +++ b/bin/metawrap-scripts/parse_read_mappings.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys, os @@ -25,5 +26,5 @@ if line.split("\t")[11]=="NM:i:0": mappings[read][0][contig_bins[contig]]=None mappings[read][1][contig_bins[contig]]=None -print mappings +print(mappings) diff --git a/bin/metawrap-scripts/plot_binning_results.py b/bin/metawrap-scripts/plot_binning_results.py index 13619d7..b920094 100755 --- a/bin/metawrap-scripts/plot_binning_results.py +++ b/bin/metawrap-scripts/plot_binning_results.py @@ -2,6 +2,7 @@ # USAGE: # ./script file1.stats file2.stats file3.stats +from __future__ import print_function import sys import matplotlib.pyplot as plt plt.switch_backend('agg') @@ -12,12 +13,12 @@ #################################################################################################################################### ############################################ MAKE THE COMPLETION PLOT ############################################ #################################################################################################################################### -print "Loading completion info...." +print("Loading completion info....") data={} max_x=0 # loop over all bin .stats files for file_name in sys.argv[3:]: - print file_name + print(file_name) bin_set=".".join(file_name.split("/")[-1].split(".")[:-1]) data[bin_set]=[] for line in open(file_name): @@ -36,7 +37,7 @@ for bin_set in data: data[bin_set].sort(reverse=True) -print "Plotting completion data..." +print("Plotting completion data...") # MAKING THE PLOT PRETTY!!!! # set some color schemes tableau20 = [(214, 39, 40), (31, 119, 180), (255, 127, 14), @@ -130,7 +131,7 @@ #################################################################################################################################### ############################################ MAKE THE CONTAMINATION PLOT ############################################ #################################################################################################################################### -print "Loading contamination info..." +print("Loading contamination info...") data={} # loop over all bin .stats files @@ -152,7 +153,7 @@ for bin_set in data: data[bin_set].sort(reverse=False) -print "Plotting the contamination data..." +print("Plotting the contamination data...") # MAKING THE PLOT PRETTY!!!! # Remove the plot frame lines. They are unnecessary chartjunk. ax = plt.subplot(122) @@ -224,7 +225,7 @@ plt.gcf().subplots_adjust(right=0.9) # save figure -print "Saving figures binning_results.eps and binning_results.png ..." +print("Saving figures binning_results.eps and binning_results.png ...") plt.tight_layout(w_pad=10) plt.subplots_adjust(top=0.92, right=0.90, left=0.08) plt.savefig("binning_results.png",format='png', dpi=300) diff --git a/bin/metawrap-scripts/plot_reassembly.py b/bin/metawrap-scripts/plot_reassembly.py index b346522..d454c4f 100755 --- a/bin/metawrap-scripts/plot_reassembly.py +++ b/bin/metawrap-scripts/plot_reassembly.py @@ -2,6 +2,7 @@ # USAGE: # ./script file1.stats file2.stats file3.stats +from __future__ import print_function import sys import matplotlib.pyplot as plt plt.switch_backend('agg') @@ -13,7 +14,7 @@ #################################################################################################################################### ############################################ MAKE THE N50 PLOT ############################################ #################################################################################################################################### -print "Loading completion info...." +print("Loading completion info....") data={} max_n50=0 # loop over all bin .stats files @@ -35,7 +36,7 @@ for bin_set in data: data[bin_set].sort(reverse=True) -print "Plotting completion data..." +print("Plotting completion data...") # MAKING THE PLOT PRETTY!!!! # set some color schemes tableau20 = [(214, 39, 40), (31, 119, 180), (255, 127, 14), @@ -120,7 +121,7 @@ #################################################################################################################################### ############################################ MAKE THE COMPLETION PLOT ############################################ #################################################################################################################################### -print "Loading completion info...." +print("Loading completion info....") data={} max_x=0 # loop over all bin .stats files @@ -144,7 +145,7 @@ for bin_set in data: data[bin_set].sort(reverse=True) -print "Plotting completion data..." +print("Plotting completion data...") # set figure size plt.style.use('ggplot') @@ -224,7 +225,7 @@ #################################################################################################################################### ############################################ MAKE THE CONTAMINATION PLOT ############################################ #################################################################################################################################### -print "Loading contamination info..." +print("Loading contamination info...") data={} # loop over all bin .stats files @@ -246,7 +247,7 @@ for bin_set in data: data[bin_set].sort(reverse=False) -print "Plotting the contamination data..." +print("Plotting the contamination data...") # MAKING THE PLOT PRETTY!!!! # Remove the plot frame lines. They are unnecessary chartjunk. ax = plt.subplot(133) @@ -313,7 +314,7 @@ # save figure -print "Saving figures reassembly_results.eps and reassembly_results.png to folder "+sys.argv[1] +print("Saving figures reassembly_results.eps and reassembly_results.png to folder "+sys.argv[1]) plt.tight_layout(w_pad=5) #plt.subplots_adjust(top=0.92, right=0.90, left=0.08) plt.savefig(sys.argv[1]+'/'+"reassembly_results.eps",format='eps', dpi=600) diff --git a/bin/metawrap-scripts/print_comment.py b/bin/metawrap-scripts/print_comment.py index dad56a6..3ee9b8d 100755 --- a/bin/metawrap-scripts/print_comment.py +++ b/bin/metawrap-scripts/print_comment.py @@ -1,10 +1,11 @@ #!/usr/bin/env python2.7 # This script prints any comment in a structured and prety way. +from __future__ import print_function import sys comm=sys.argv[1] delim=sys.argv[2] -print '\n'+delim*120 +print('\n'+delim*120) max_len=90 @@ -14,15 +15,15 @@ if (len(line) + 1 + len(word))>max_len: edge1=(120-len(line))/2 - 5 edge2=120-edge1-len(line) - 10 - print delim*5 + " "*edge1 + line + " "*edge2 + delim*5 + print(delim*5 + " "*edge1 + line + " "*edge2 + delim*5) line=word else: line = line+" "+word edge1=(120-len(line))/2 - 5 edge2=120-edge1-len(line) - 10 -print delim*5 + " "*edge1 + line + " "*edge2 + delim*5 +print(delim*5 + " "*edge1 + line + " "*edge2 + delim*5) -print delim*120+'\n' +print(delim*120+'\n') diff --git a/bin/metawrap-scripts/prune_blast_hits.py b/bin/metawrap-scripts/prune_blast_hits.py index 47b4764..9403d7a 100755 --- a/bin/metawrap-scripts/prune_blast_hits.py +++ b/bin/metawrap-scripts/prune_blast_hits.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys #load in nodes.dmp @@ -25,4 +26,4 @@ ct+=1 #print "\t".join(cut[:5] + cut[6:]) #print "\t".join(cut[4:6]) - print "\t".join(cut) + print("\t".join(cut)) diff --git a/bin/metawrap-scripts/rm_short_contigs.py b/bin/metawrap-scripts/rm_short_contigs.py index f30c137..744ccf8 100755 --- a/bin/metawrap-scripts/rm_short_contigs.py +++ b/bin/metawrap-scripts/rm_short_contigs.py @@ -1,9 +1,10 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys for line in open(sys.argv[2]): - if not line.startswith(">"): print line.strip() + if not line.startswith(">"): print(line.strip()) else: if int(line.split("_")[3])"+cut[0] - print cut[9] - print "+" - print cut[10] + print(">"+cut[0]) + print(cut[9]) + print("+") + print(cut[10]) diff --git a/bin/metawrap-scripts/select_human_reads.py b/bin/metawrap-scripts/select_human_reads.py index 23c9746..fde3b81 100755 --- a/bin/metawrap-scripts/select_human_reads.py +++ b/bin/metawrap-scripts/select_human_reads.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys # This script takes in the reads that are proposed by bmtagger to be human, and filteres them out of the original fastq file. @@ -16,7 +17,7 @@ if line[1:].split("/")[0].split()[0] in human: skip=False else: skip=True - if skip==False: print line.rstrip() + if skip==False: print(line.rstrip()) diff --git a/bin/metawrap-scripts/shorten_contig_names.py b/bin/metawrap-scripts/shorten_contig_names.py index fa6a4d8..ac13b20 100755 --- a/bin/metawrap-scripts/shorten_contig_names.py +++ b/bin/metawrap-scripts/shorten_contig_names.py @@ -1,17 +1,18 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys shorten=False for line in open(sys.argv[1]): if line[0]!=">": - print line.rstrip() + print(line.rstrip()) else: if shorten==True: - print "_".join(line.rstrip().split("_")[:4]) + print("_".join(line.rstrip().split("_")[:4])) elif len(line)>20 and len(line.split("_"))>5: - print "_".join(line.rstrip().split("_")[:4]) + print("_".join(line.rstrip().split("_")[:4])) shorten=True else: - print line.rstrip() + print(line.rstrip()) diff --git a/bin/metawrap-scripts/skip_human_reads.py b/bin/metawrap-scripts/skip_human_reads.py index 9cb23eb..0cf884d 100755 --- a/bin/metawrap-scripts/skip_human_reads.py +++ b/bin/metawrap-scripts/skip_human_reads.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys # This script takes in the reads that are probosed by bmtagger to be human, and filteres them out of the original fastq file. @@ -16,5 +17,5 @@ if line[1:].split("/")[0].split()[0] in human: skip=True else: skip=False - if skip==False: print line.rstrip() + if skip==False: print(line.rstrip()) diff --git a/bin/metawrap-scripts/sort_contigs.py b/bin/metawrap-scripts/sort_contigs.py index 158154a..9c16fc7 100755 --- a/bin/metawrap-scripts/sort_contigs.py +++ b/bin/metawrap-scripts/sort_contigs.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys import textwrap @@ -15,5 +16,5 @@ for k in sorted(dic, key=lambda k: len(dic[k]), reverse=True): - print k - print textwrap.fill(dic[k], 100, break_on_hyphens = False) + print(k) + print(textwrap.fill(dic[k], 100, break_on_hyphens = False)) diff --git a/bin/metawrap-scripts/split_concoct_bins.py b/bin/metawrap-scripts/split_concoct_bins.py index 585d250..9c8af49 100755 --- a/bin/metawrap-scripts/split_concoct_bins.py +++ b/bin/metawrap-scripts/split_concoct_bins.py @@ -3,9 +3,10 @@ # Usage: # ./script clustering_gt1000.csv assembly_file.fa out_folder +from __future__ import print_function import sys, os -print "Loading in the bins that the contigs belong to..." +print("Loading in the bins that the contigs belong to...") bins={} for line in open(sys.argv[1]): if line.startswith("contig_id"): @@ -13,7 +14,7 @@ bins[line.strip().split(",")[0].split(".")[0]] = line.strip().split(",")[1] -print "Going through the entire assembly and splitting contigs into their respective bin file..." +print("Going through the entire assembly and splitting contigs into their respective bin file...") current_bin="" for line in open(sys.argv[2]): if line.startswith(">"): @@ -27,7 +28,7 @@ current_bin="unbinned.fa" f = open(sys.argv[3]+"/"+current_bin,'a') f.write(line) -print "Done!" +print("Done!") diff --git a/bin/metawrap-scripts/split_reads_into_bins.py b/bin/metawrap-scripts/split_reads_into_bins.py index 00a7ec9..27a6fb5 100755 --- a/bin/metawrap-scripts/split_reads_into_bins.py +++ b/bin/metawrap-scripts/split_reads_into_bins.py @@ -1,12 +1,13 @@ #!/usr/bin/env python2.7 # Usage: ./script mapping_file.dict reads_1.fastq reads_2.fastq output_dir +from __future__ import print_function import sys, os -print "\nLoading in dictionary containing bin mapping of all the reads...\n" +print("\nLoading in dictionary containing bin mapping of all the reads...\n") mapping=eval(open(sys.argv[1]).readline()) -print "\n\nParsing forward reads and splitting them into files..." +print("\n\nParsing forward reads and splitting them into files...") ct=3 line1="" line2="" @@ -27,7 +28,7 @@ if read in mapping: for bin_name in mapping[read][0]: if bin_name+"_strict" not in opened_files: - print "Opening file "+sys.argv[4]+'/'+bin_name+".strict_1.fastq" + print("Opening file "+sys.argv[4]+'/'+bin_name+".strict_1.fastq") opened_files[bin_name+"_strict"] = open(sys.argv[4]+"/"+bin_name+".strict_1.fastq", 'w') opened_files[bin_name+"_strict"].write(line0) opened_files[bin_name+"_strict"].write(line1) @@ -36,17 +37,17 @@ for bin_name in mapping[read][1]: if bin_name+"permissive" not in opened_files: - print "Opening file "+sys.argv[4]+'/'+bin_name+".permissive_1.fastq" + print("Opening file "+sys.argv[4]+'/'+bin_name+".permissive_1.fastq") opened_files[bin_name+"permissive"] = open(sys.argv[4]+"/"+bin_name+".permissive_1.fastq", 'w') opened_files[bin_name+"permissive"].write(line0) opened_files[bin_name+"permissive"].write(line1) opened_files[bin_name+"permissive"].write(line2) opened_files[bin_name+"permissive"].write(line3) -print "Closing bin fastq files" +print("Closing bin fastq files") for f in opened_files: opened_files[f].close() -print "\n\nParsing reverse reads and splitting them into files..." +print("\n\nParsing reverse reads and splitting them into files...") ct=3 line1="" line2="" @@ -67,7 +68,7 @@ if read in mapping: for bin_name in mapping[read][0]: if bin_name+"_strict" not in opened_files: - print "Opening file "+sys.argv[4]+'/'+bin_name+".strict_2.fastq" + print("Opening file "+sys.argv[4]+'/'+bin_name+".strict_2.fastq") opened_files[bin_name+"_strict"] = open(sys.argv[4]+"/"+bin_name+".strict_2.fastq", 'w') opened_files[bin_name+"_strict"].write(line0) opened_files[bin_name+"_strict"].write(line1) @@ -76,14 +77,14 @@ for bin_name in mapping[read][1]: if bin_name+"permissive" not in opened_files: - print "Opening file "+sys.argv[4]+'/'+bin_name+".permissive_2.fastq" + print("Opening file "+sys.argv[4]+'/'+bin_name+".permissive_2.fastq") opened_files[bin_name+"permissive"] = open(sys.argv[4]+"/"+bin_name+".permissive_2.fastq", 'w') opened_files[bin_name+"permissive"].write(line0) opened_files[bin_name+"permissive"].write(line1) opened_files[bin_name+"permissive"].write(line2) opened_files[bin_name+"permissive"].write(line3) -print "Closing bin fastq files" +print("Closing bin fastq files") for f in opened_files: opened_files[f].close() diff --git a/bin/metawrap-scripts/split_salmon_out_into_bins.py b/bin/metawrap-scripts/split_salmon_out_into_bins.py index 6727c42..9b7e8c1 100755 --- a/bin/metawrap-scripts/split_salmon_out_into_bins.py +++ b/bin/metawrap-scripts/split_salmon_out_into_bins.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys, os import numpy as np @@ -70,12 +71,12 @@ sys.stdout.write('Genomic bins') for sample in bin_abundances[bin]["samples"]: sys.stdout.write('\t'+sample) - print "" + print("") first=False sys.stdout.write('.'.join(bin.split('.')[:-1])) for sample in bin_abundances[bin]["samples"]: sys.stdout.write("\t" + str(bin_abundances[bin]["samples"][sample])) - print "" + print("") diff --git a/bin/metawrap-scripts/summarize_checkm.py b/bin/metawrap-scripts/summarize_checkm.py index a9721f1..8814a36 100755 --- a/bin/metawrap-scripts/summarize_checkm.py +++ b/bin/metawrap-scripts/summarize_checkm.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2.7 +from __future__ import print_function import sys # This script summarizes the statistics of each bin by parsing # the checkm_folder/storage/bin_stats_ext.tsv file of the CheckM output @@ -6,15 +7,15 @@ if len(sys.argv)==3: binner=sys.argv[2] - print "bin\tcompleteness\tcontamination\tGC\tlineage\tN50\tsize\tbinner" + print("bin\tcompleteness\tcontamination\tGC\tlineage\tN50\tsize\tbinner") elif len(sys.argv)==4: source={} for line in open(sys.argv[3]): cut=line.strip().split("\t") source[cut[0]]=cut[7] - print "bin\tcompleteness\tcontamination\tGC\tlineage\tN50\tsize\tbinner" + print("bin\tcompleteness\tcontamination\tGC\tlineage\tN50\tsize\tbinner") else: - print "bin\tcompleteness\tcontamination\tGC\tlineage\tN50\tsize" + print("bin\tcompleteness\tcontamination\tGC\tlineage\tN50\tsize") for line in open(sys.argv[1]): @@ -27,19 +28,19 @@ if len(sys.argv)==3: - print "\t".join([name, str(dic["Completeness"])[:5],\ + print("\t".join([name, str(dic["Completeness"])[:5],\ str(dic["Contamination"])[:5], str(dic["GC"])[:5],\ dic["marker lineage"], str(dic["N50 (contigs)"]),\ - str(dic["Genome size"]), binner]) + str(dic["Genome size"]), binner])) elif len(sys.argv)==4: - print "\t".join([name, str(dic["Completeness"])[:5],\ + print("\t".join([name, str(dic["Completeness"])[:5],\ str(dic["Contamination"])[:5], str(dic["GC"])[:5],\ dic["marker lineage"], str(dic["N50 (contigs)"]),\ - str(dic["Genome size"]), source[name]]) + str(dic["Genome size"]), source[name]])) else: - print "\t".join([name, str(dic["Completeness"])[:5],\ + print("\t".join([name, str(dic["Completeness"])[:5],\ str(dic["Contamination"])[:5], str(dic["GC"])[:5],\ dic["marker lineage"], str(dic["N50 (contigs)"]),\ - str(dic["Genome size"])]) + str(dic["Genome size"])])) diff --git a/bin/metawrap-scripts/summarize_salmon_files.py b/bin/metawrap-scripts/summarize_salmon_files.py index f454ff9..db1c7f1 100755 --- a/bin/metawrap-scripts/summarize_salmon_files.py +++ b/bin/metawrap-scripts/summarize_salmon_files.py @@ -11,6 +11,7 @@ C. Titus Brown, 11/2015 """ +from __future__ import print_function import os, os.path import sys import csv @@ -19,9 +20,9 @@ def process_quant_file(root, filename, outname): """ Convert individual quant.sf files into .counts files (transcripts\tcount). """ - print >>sys.stderr, 'Loading counts from:', root, filename + print('Loading counts from:', root, filename, file=sys.stderr) outfp = open(outname, 'w') - print >>outfp, "transcript\tcount" + print("transcript\tcount", file=outfp) d = {} full_file = os.path.join(root, filename) @@ -30,7 +31,7 @@ def process_quant_file(root, filename, outname): continue name, length, eff_length, tpm, count = line.strip().split('\t') - print >>outfp, "%s\t%s" % (name, float(tpm)) + print("%s\t%s" % (name, float(tpm)), file=outfp) def main(): @@ -43,7 +44,7 @@ def main(): quantlist = [] start_dir = '.' - print >>sys.stderr, 'Starting in:', os.path.abspath(start_dir) + print('Starting in:', os.path.abspath(start_dir), file=sys.stderr) for root, dirs, files in os.walk('.'): for filename in files: if filename.endswith('quant.sf'): @@ -54,7 +55,7 @@ def main(): break - print ",\n".join([ "\"%s\"" % i for i in sorted(quantlist)]) + print(",\n".join([ "\"%s\"" % i for i in sorted(quantlist)])) if __name__ == '__main__': main()