From 6f68c67a609ee0ee720f7c6e197fe37ecd940b57 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Mon, 1 Apr 2019 22:56:22 +0200 Subject: [PATCH 1/3] Develop (#827) * Merged into the wrong branch without noticing :( (#814) * use better conda link (#799) * Estimated filtering fix (#813) * oops * fix testing and set a max number of filtered reads * apparently a bunch of things were getting skipped * fix wrappers * update computeMatrix wrapper * Decrease memory needs (#817) * Use an iterator to not blow memory up * Update a bit more * The GC bias stuff is all deprecated, I'm not fixing that old code * Cache resulting counts rather than just decreasing the bin size (#818) * Cache resulting counts rather than just decreasing the bin size * sanity check * Implement #815 * [skip ci] update change log * Implement #816 (#825) * Implement #816 * expose option * Add a test using pseudocounts and skipZeroOverZero * syntax * Fix tests * Make --skipZeroOverZero a galaxy macro and add to bigwigCompare * [ci skip] a bit of formatting * Fix #822 (#826) --- CHANGES.txt | 3 ++ deeptools/_version.py | 2 +- deeptools/bamCoverage.py | 2 + deeptools/bigwigCompare.py | 7 +++ deeptools/correctGCBias.py | 6 +++ deeptools/countReadsPerBin.py | 9 ++-- deeptools/getScaleFactor.py | 23 ++++++---- deeptools/plotProfile.py | 46 +++++++++---------- deeptools/sumCoveragePerBin.py | 9 ++-- ...st_bigwigCompare_and_multiBigwigSummary.py | 12 +++++ deeptools/writeBedGraph_bam_and_bw.py | 10 ++-- galaxy/wrapper/bamCompare.xml | 6 +-- galaxy/wrapper/bigwigCompare.xml | 2 + galaxy/wrapper/deepTools_macros.xml | 12 ++++- setup.py | 4 +- 15 files changed, 97 insertions(+), 56 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index b08c98514..97cca145b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,9 @@ 3.2.1 * Changed a bug in `estimateReadFiltering` where the estimated number of filtered reads was typically too low. + * Made an internal change that should drastically reduce the memory requirements of many tools. This slightly increases run time, but as the resulting resource usage is much more attractive this is judged worthwhile. + * An informative error message is now produced with `bamCoverage` if RPGC normalization is requested but no effective genome size is provided (issue #815). + * Fixes some issues with y-axis scaling (issue #822) 3.2.0 diff --git a/deeptools/_version.py b/deeptools/_version.py index 89f024fc3..1d0891b80 100644 --- a/deeptools/_version.py +++ b/deeptools/_version.py @@ -2,4 +2,4 @@ # This file is originally generated from Git information by running 'setup.py # version'. Distribution tarballs contain a pre-generated copy of this file. -__version__ = '3.2.0' +__version__ = '3.2.1' diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py index 169da1822..c97cadf57 100644 --- a/deeptools/bamCoverage.py +++ b/deeptools/bamCoverage.py @@ -152,6 +152,8 @@ def main(args=None): if args.normalizeUsing == 'None': args.normalizeUsing = None # For the sake of sanity + elif args.normalizeUsing == 'RPGC' and not args.effectiveGenomeSize: + sys.exit("RPGC normalization requires an --effectiveGenomeSize!\n") if args.normalizeUsing: # if a normalization is required then compute the scale factors diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index bfce8a80c..80738840c 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -61,6 +61,12 @@ def parse_arguments(args=None): type=float, required=False) + parser.add_argument('--skipZeroOverZero', + help='Skip bins where BOTH BAM files lack coverage. ' + 'This is determined BEFORE any applicable pseudocount ' + 'is added.', + action='store_true') + parser.add_argument('--operation', help='The default is to output the log2ratio of the ' 'two samples. The reciprocal ratio returns the ' @@ -159,6 +165,7 @@ def main(args=None): blackListFileName=args.blackListFileName, verbose=args.verbose, numberOfProcessors=args.numberOfProcessors, + skipZeroOverZero=args.skipZeroOverZero, format=args.outFileFormat, smoothLength=False, missingDataAsZero=not args.skipNonCoveredRegions, diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py index 28ab27533..5ac1ad8b7 100644 --- a/deeptools/correctGCBias.py +++ b/deeptools/correctGCBias.py @@ -195,8 +195,11 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): if r.flag & 4 == 0] bam.close() + r_index = -1 for read in reads: + if read.is_unmapped: + continue r_index += 1 try: # calculate GC content of read fragment @@ -340,6 +343,7 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, matePairs = {} read_repetitions = 0 removed_duplicated_reads = 0 + # cache data # r.flag & 4 == 0 is to filter unmapped reads that # have a genomic position @@ -348,6 +352,8 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, r_index = -1 for read in reads: + if read.pos <= start or read.is_unmapped: + continue r_index += 1 copies = None gc = None diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index 178dad250..62c3eedb2 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -603,16 +603,15 @@ def get_coverage_of_region(self, bamHandle, chrom, regions, start_time = time.time() # caching seems faster. TODO: profile the function c = 0 - if chrom in bamHandle.references: - reads = [r for r in bamHandle.fetch(chrom, regStart, regEnd) - if r.flag & 4 == 0] - else: + if chrom not in bamHandle.references: raise NameError("chromosome {} not found in bam file".format(chrom)) prev_pos = set() lpos = None # of previous processed read pair - for read in reads: + for read in bamHandle.fetch(chrom, regStart, regEnd): + if read.is_unmapped: + continue if self.minMappingQuality and read.mapq < self.minMappingQuality: continue diff --git a/deeptools/getScaleFactor.py b/deeptools/getScaleFactor.py index d218b59ab..541b4febd 100644 --- a/deeptools/getScaleFactor.py +++ b/deeptools/getScaleFactor.py @@ -14,15 +14,20 @@ def getFractionKept_wrapper(args): return getFractionKept_worker(*args) -def getFractionKept_worker(chrom, start, end, bamFile, args): +def getFractionKept_worker(chrom, start, end, bamFile, args, offset): """ Queries the BAM file and counts the number of alignments kept/found in the first 50000 bases. """ bam = bamHandler.openBam(bamFile) + start += offset * 50000 end = min(end, start + 50000) tot = 0 filtered = 0 + + if end <= start: + return (filtered, tot) + prev_pos = set() lpos = None if chrom in bam.references: @@ -151,13 +156,10 @@ def fraction_kept(args, stats): else: chrom_sizes = list(zip(bam_handle.references, bam_handle.lengths)) - while total < num_needed_to_sample and distanceBetweenBins > 50000: - # If we've iterated, then halve distanceBetweenBins - distanceBetweenBins /= 2 - if distanceBetweenBins < 50000: - distanceBetweenBins = 50000 - - res = mapReduce.mapReduce((bam_handle.filename, args), + offset = 0 + # Iterate over bins at various non-overlapping offsets until we have enough data + while total < num_needed_to_sample and offset < np.ceil(distanceBetweenBins / 50000): + res = mapReduce.mapReduce((bam_handle.filename, args, offset), getFractionKept_wrapper, chrom_sizes, genomeChunkLength=distanceBetweenBins, @@ -166,7 +168,10 @@ def fraction_kept(args, stats): verbose=args.verbose) if len(res): - filtered, total = np.sum(res, axis=0) + foo, bar = np.sum(res, axis=0) + filtered += foo + total += bar + offset += 1 if total == 0: # This should never happen diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 6e0beb9dd..d805a4e04 100644 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -690,6 +690,8 @@ def plot_profile(self): first = True ax_list = [] + globalYmin = np.inf + globalYmax = -np.inf for plot in range(self.numplots): localYMin = None localYMax = None @@ -698,7 +700,7 @@ def plot_profile(self): if (row == 0 and col == 0) or len(self.y_min) > 1 or len(self.y_max) > 1: ax = self.fig.add_subplot(self.grids[row, col]) else: - ax = self.fig.add_subplot(self.grids[row, col], sharey=ax_list[0]) + ax = self.fig.add_subplot(self.grids[row, col]) if self.per_group: title = self.hm.matrix.group_labels[plot] @@ -717,10 +719,10 @@ def plot_profile(self): _row, _col = plot, data_idx else: _row, _col = data_idx, plot - if localYMin is None or self.y_min[_col % len(self.y_min)] < localYMin: - localYMin = self.y_min[_col % len(self.y_min)] - if localYMax is None or self.y_max[_col % len(self.y_max)] > localYMax: - localYMax = self.y_max[_col % len(self.y_max)] + if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: + localYMin = self.y_min[col % len(self.y_min)] + if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: + localYMax = self.y_max[col % len(self.y_max)] sub_matrix = self.hm.matrix.get_matrix(_row, _col) @@ -738,6 +740,8 @@ def plot_profile(self): self.color_list[coloridx], label, plot_type=self.plot_type) + globalYmin = min(np.float64(globalYmin), ax.get_ylim()[0]) + globalYmax = max(globalYmax, ax.get_ylim()[1]) # remove the numbers of the y axis for all plots plt.setp(ax.get_yticklabels(), visible=False) @@ -747,12 +751,6 @@ def plot_profile(self): # on each row and make the numbers and ticks visible plt.setp(ax.get_yticklabels(), visible=True) ax.axes.set_ylabel(self.y_axis_label) - """ - # reduce the number of yticks by half - num_ticks = len(ax.get_yticks()) - yticks = [ax.get_yticks()[i] for i in range(1, num_ticks, 2)] - ax.set_yticks(yticks) - """ totalWidth = sub_matrix['matrix'].shape[1] xticks, xtickslabel = self.getTicks(tickIdx) @@ -776,23 +774,23 @@ def plot_profile(self): frameon=False, markerscale=0.5) if len(self.y_min) == 1 and len(self.y_max) == 1: first = False + ax_list.append(ax) - """ - ax.legend(bbox_to_anchor=(-0.05, -1.13, 1., 1), - loc='upper center', - ncol=1, mode="expand", prop=font_p, - frameon=False, markerscale=0.5) - """ - lims = ax.get_ylim() + # It turns out that set_ylim only takes np.float64s + for sample_id, subplot in enumerate(ax_list): + localYMin = self.y_min[sample_id % len(self.y_min)] + localYMax = self.y_max[sample_id % len(self.y_max)] + lims = [globalYmin, globalYmax] if localYMin is not None: - lims = (localYMin, lims[1]) - if localYMax is not None: - lims = (lims[0], localYMax) + if localYMax is not None: + lims = (np.float64(localYMin), np.float64(localYMax)) + else: + lims = (np.float64(localYMin), lims[1]) + elif localYMax is not None: + lims = (lims[0], np.float64(localYMax)) if lims[0] >= lims[1]: lims = (lims[0], lims[0] + 1) - ax.set_ylim(lims) - - ax_list.append(ax) + ax_list[sample_id].set_ylim(lims) plt.subplots_adjust(wspace=0.05, hspace=0.3) plt.tight_layout() diff --git a/deeptools/sumCoveragePerBin.py b/deeptools/sumCoveragePerBin.py index a5e347da0..705da7150 100644 --- a/deeptools/sumCoveragePerBin.py +++ b/deeptools/sumCoveragePerBin.py @@ -85,10 +85,7 @@ def get_coverage_of_region(self, bamHandle, chrom, regions, c = 0 try: # BAM input - if chrom in bamHandle.references: - reads = [r for r in bamHandle.fetch(chrom, regStart, regEnd) - if r.flag & 4 == 0] - else: + if chrom not in bamHandle.references: raise NameError("chromosome {} not found in bam file".format(chrom)) except: # bigWig input, as used by plotFingerprint @@ -104,7 +101,9 @@ def get_coverage_of_region(self, bamHandle, chrom, regions, prev_pos = set() lpos = None # of previous processed read pair - for read in reads: + for read in bamHandle.fetch(chrom, regStart, regEnd): + if read.is_unmapped: + continue if self.minMappingQuality and read.mapq < self.minMappingQuality: continue diff --git a/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py b/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py index 3cd05b940..831924277 100644 --- a/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py +++ b/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py @@ -62,6 +62,18 @@ def test_bigwigCompare_skipnas(): unlink(outfile) +def test_bigwigCompare_skipZeroOverZero(): + outfile = '/tmp/result.bg"' + args = "-b1 {} -b2 {} -o {} --skipZeroOverZero --pseudocount 1 3 --outFileFormat bedgraph".format(BIGWIG_A, BIGWIG_A, outfile).split() + bwComp.main(args) + _foo = open(outfile, 'r') + resp = _foo.readlines() + _foo.close() + expected = ['3R\t100\t200\t-1\n'] + assert resp == expected, "{} != {}".format(resp, expected) + unlink(outfile) + + def test_multiBigwigSummary(): outfile = '/tmp/result.bg' args = "bins -b {} {} --binSize 50 -o {}".format(BIGWIG_A, BIGWIG_B, outfile).split() diff --git a/deeptools/writeBedGraph_bam_and_bw.py b/deeptools/writeBedGraph_bam_and_bw.py index 67cfa9b43..000b6b288 100644 --- a/deeptools/writeBedGraph_bam_and_bw.py +++ b/deeptools/writeBedGraph_bam_and_bw.py @@ -45,7 +45,7 @@ def writeBedGraph_wrapper(args): def writeBedGraph_worker( chrom, start, end, tileSize, defaultFragmentLength, bamOrBwFileList, func, funcArgs, extendPairedEnds=True, smoothLength=0, - missingDataAsZero=False, fixed_step=False): + skipZeroOverZero=False, missingDataAsZero=False, fixed_step=False): r""" Writes a bedgraph having as base a number of bam files. @@ -102,6 +102,10 @@ def writeBedGraph_worker( "files. Remove this chromosome from the bigwig file " "to continue".format(chrom)) + if skipZeroOverZero and np.sum(tileCoverage) == 0: + previousValue = None + continue + value = func(tileCoverage, funcArgs) if fixed_step: @@ -147,7 +151,7 @@ def writeBedGraph( bamOrBwFileList, outputFileName, fragmentLength, func, funcArgs, tileSize=25, region=None, blackListFileName=None, numberOfProcessors=1, format="bedgraph", extendPairedEnds=True, missingDataAsZero=False, - smoothLength=0, fixed_step=False, verbose=False): + skipZeroOverZero=False, smoothLength=0, fixed_step=False, verbose=False): r""" Given a list of bamfiles, a function and a function arguments, this method writes a bedgraph file (or bigwig) file @@ -206,7 +210,7 @@ def writeBedGraph( res = mapReduce.mapReduce((tileSize, fragmentLength, bamOrBwFileList, func, funcArgs, extendPairedEnds, smoothLength, - missingDataAsZero, fixed_step), + skipZeroOverZero, missingDataAsZero, fixed_step), writeBedGraph_wrapper, chromNamesAndSize, genomeChunkLength=genomeChunkLength, diff --git a/galaxy/wrapper/bamCompare.xml b/galaxy/wrapper/bamCompare.xml index a63210c8f..4c5e45bb1 100644 --- a/galaxy/wrapper/bamCompare.xml +++ b/galaxy/wrapper/bamCompare.xml @@ -164,11 +164,7 @@ - - - - + + diff --git a/galaxy/wrapper/deepTools_macros.xml b/galaxy/wrapper/deepTools_macros.xml index f4c88d0f0..13b8fe809 100644 --- a/galaxy/wrapper/deepTools_macros.xml +++ b/galaxy/wrapper/deepTools_macros.xml @@ -1,10 +1,10 @@ --numberOfProcessors "\${GALAXY_SLOTS:-4}" - 3.2.0.0 + 3.2.1.0 - deeptools + deeptools samtools @@ -650,6 +650,14 @@ is vital to you, select Yes below."> (default: False)" /> + + + + + + + = 0.17.0", "matplotlib >= 2.1.2", "pysam >= 0.14.0", - "numpydoc >=0.5", - "pyBigWig >=0.2.1", + "numpydoc >= 0.5", + "pyBigWig >= 0.2.1", "py2bit >= 0.2.0", "plotly >= 2.0.0", "deeptoolsintervals >= 0.1.7" From 9092b83f7ab4d7d57409f0cd9046d573209e9a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Wed, 29 May 2019 14:05:51 +0200 Subject: [PATCH 2/3] fixes linting issues (#837) --- galaxy/wrapper/plotEnrichment.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/galaxy/wrapper/plotEnrichment.xml b/galaxy/wrapper/plotEnrichment.xml index e54f6daf1..20c22ccdf 100644 --- a/galaxy/wrapper/plotEnrichment.xml +++ b/galaxy/wrapper/plotEnrichment.xml @@ -29,7 +29,7 @@ #end if #if $advancedOpt.showAdvancedOpt == "yes" - #if $advancedOpt.attributeKey and str($advancedOpt.attributeKey).strip() != "": + #if $advancedOpt.attributeKey: --attributeKey '$advancedOpt.attributeKey' #end if @@ -88,9 +88,9 @@ - Date: Fri, 9 Aug 2019 20:50:19 +0200 Subject: [PATCH 3/3] Delete #test.bg# (#859) File is removed upon clean. --- deeptools/test/test_heatmapper/#test.bg# | 48 ------------------------ 1 file changed, 48 deletions(-) delete mode 100644 deeptools/test/test_heatmapper/#test.bg# diff --git a/deeptools/test/test_heatmapper/#test.bg# b/deeptools/test/test_heatmapper/#test.bg# deleted file mode 100644 index 6f68ad492..000000000 --- a/deeptools/test/test_heatmapper/#test.bg# +++ /dev/null @@ -1,48 +0,0 @@ -ch1 0 25 0 -ch1 25 50 0 -ch1 50 75 0 -ch1 75 100 0 -ch1 100 124 2 -ch1 125 149 0 -ch1 150 174 0 -ch1 175 199 0 -ch1 200 224 0 -ch1 225 249 0 -ch1 250 274 0 -ch1 275 299 0 -ch1 300 324 0 -ch1 325 349 0 -ch1 350 374 0 -ch1 375 399 0 -ch2 0 24 0 -ch2 25 49 0 -ch2 50 74 3 -ch2 75 99 0 -ch2 100 124 0 -ch2 125 149 0 -ch2 150 174 1 -ch2 175 199 0 -ch2 200 224 0 -ch2 225 249 0 -ch2 250 274 0 -ch2 275 299 0 -ch2 300 324 0 -ch2 325 349 0 -ch2 350 374 0 -ch2 375 399 0 -ch3 0 24 0 -ch3 25 49 0 -ch3 50 74 3 -ch3 75 99 0 -ch3 100 124 0 -ch3 125 149 0 -ch3 150 174 1 -ch3 175 199 0 -ch3 200 224 0 -ch3 225 249 0 -ch3 250 274 0 -ch3 275 299 0 -ch3 300 324 0 -ch3 325 349 0 -ch3 350 374 0 -ch3 375 399 0