Skip to content

Commit

Permalink
Merge branch 'feature/plotFingerprint_bigWig' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
dpryan79 committed Nov 2, 2016
2 parents 742c148 + 0cb808a commit 935ea44
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 16 deletions.
63 changes: 50 additions & 13 deletions deeptools/countReadsPerBin.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from deeptools import bamHandler
from deeptools import mapReduce
from deeptoolsintervals import GTF
import pyBigWig

debug = 0
old_settings = np.seterr(all='ignore')
Expand Down Expand Up @@ -235,7 +236,13 @@ def run(self, allArgs=None):
# workers for analysis. If too short, too much time is spend loading the files
# if too long, some processors end up free.
# the following values are empirical
bamFilesHandlers = [bamHandler.openBam(x) for x in self.bamFilesList]
bamFilesHandlers = []
for x in self.bamFilesList:
try:
y = bamHandler.openBam(x)
except:
y = pyBigWig.open(x)
bamFilesHandlers.append(y)
chromSizes, non_common = deeptools.utilities.getCommonChrNames(bamFilesHandlers, verbose=self.verbose)

# skip chromosome in the list. This is usually for the
Expand Down Expand Up @@ -263,14 +270,28 @@ def run(self, allArgs=None):
min_num_of_samples = int(genomeSize / np.mean(chrLengths))
raise ValueError("numberOfSamples has to be bigger than {} ".format(min_num_of_samples))

max_mapped = max([x.mapped for x in bamFilesHandlers])

reads_per_bp = float(max_mapped) / genomeSize
# chunkSize = int(100 / ( reads_per_bp * len(bamFilesList)) )

chunkSize = int(self.stepSize * 1e3 / (reads_per_bp * len(bamFilesHandlers)))
max_mapped = []
for x in bamFilesHandlers:
try:
max_mapped.append(x.mapped)
except:
# bigWig, use a fixed value
max_mapped.append(0)
max_mapped = max(max_mapped)

# If max_mapped is 0 (i.e., bigWig input), set chunkSize to a multiple of binLength and use every bin
if max_mapped == 0:
chunkSize = 10000 * self.binLength
self.stepSize = self.binLength
else:
reads_per_bp = float(max_mapped) / genomeSize
chunkSize = int(self.stepSize * 1e3 / (reads_per_bp * len(bamFilesHandlers)))
[bam_h.close() for bam_h in bamFilesHandlers]

# Ensure that chunkSize is always at least self.stepSize
if chunkSize < self.stepSize:
chunkSize = self.stepSize

if self.verbose:
print("step size is {}".format(self.stepSize))

Expand Down Expand Up @@ -388,7 +409,12 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None):

start_time = time.time()

bam_handlers = [bamHandler.openBam(bam) for bam in self.bamFilesList]
bam_handlers = []
for fname in self.bamFilesList:
try:
bam_handlers.append(bamHandler.openBam(fname))
except:
bam_handlers.append(pyBigWig.open(fname))

blackList = None
if self.blackListFileName is not None:
Expand Down Expand Up @@ -537,11 +563,22 @@ def get_coverage_of_region(self, bamHandle, chrom, regions,
start_time = time.time()
# caching seems faster. TODO: profile the function
c = 0
if chrom in bamHandle.references:
reads = [r for r in bamHandle.fetch(chrom, regStart, regEnd)
if r.flag & 4 == 0]
else:
raise NameError("chromosome {} not found in bam file".format(chrom))
try:
# BAM input
if chrom in bamHandle.references:
reads = [r for r in bamHandle.fetch(chrom, regStart, regEnd)
if r.flag & 4 == 0]
else:
raise NameError("chromosome {} not found in bam file".format(chrom))
except:
# bigWig input, as used by plotFingerprint
if bamHandle.chroms(chrom):
_ = np.array(bamHandle.stats(chrom, regStart, regEnd, type="mean", nBins=nRegBins), dtype=np.float)
_[np.isnan(_)] = 0.0
coverages += _
continue
else:
raise NameError("chromosome {} not found in bigWig file with chroms {}".format(chrom, bamHandle.chroms()))

prev_start_pos = None # to store the start positions
# of previous processed read pair
Expand Down
3 changes: 2 additions & 1 deletion deeptools/plotFingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def signalAndBinDist(x):

# Compute the JSD from the PMFs
M = (PMFinput + PMFchip) / 2.0
JSD = 0.5 * (np.sum(PMFinput * np.log2(PMFinput / M))) + 0.5 * (np.sum(PMFchip * np.log2(PMFchip / M)))
JSD = 0.5 * (np.nansum(PMFinput * np.log2(PMFinput / M))) + 0.5 * (np.nansum(PMFchip * np.log2(PMFchip / M)))

return np.sqrt(JSD)

Expand Down Expand Up @@ -424,5 +424,6 @@ def main(args=None):
args.outQualityMetrics.write("\n")
args.outQualityMetrics.close()


if __name__ == "__main__":
main()
8 changes: 6 additions & 2 deletions deeptools/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,12 @@ def get_chrom_and_size(bam_handler):
:param bam_handler:
:return: list of (chrom, size) tuples
"""
return [(bam_handler.references[i], bam_handler.lengths[i])
for i in range(0, len(bam_handler.references))]
try:
# BAM file
return [(bam_handler.references[i], bam_handler.lengths[i])
for i in range(0, len(bam_handler.references))]
except:
return [(k, v) for k, v in bam_handler.chroms().items()]

def print_chr_names_and_size(chr_set):
sys.stderr.write("chromosome\tlength\n")
Expand Down

0 comments on commit 935ea44

Please sign in to comment.