Skip to content

Commit

Permalink
Merge branch 'dev' into 'master'
Browse files Browse the repository at this point in the history
Merge dev into master pre-release

See merge request research/pomoxis!97
  • Loading branch information
mwykes committed Feb 17, 2020
2 parents c493771 + 18bf180 commit 023a4cc
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 6 deletions.
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ CXX ?= g++
CONDA?=~/miniconda3/

# Builds a cache of binaries which can just be copied for CI
BINARIES=minimap2 miniasm racon samtools bcftools seqkit bedtools
BINARIES=minimap2 miniasm racon samtools bcftools seqkit bedtools bgzip tabix


BINCACHEDIR=bincache
Expand Down Expand Up @@ -68,9 +68,17 @@ $(BINCACHEDIR)/samtools: | $(BINCACHEDIR) $(BINBUILDDIR)
wget https://github.com/samtools/samtools/releases/download/${SAMVER}/samtools-${SAMVER}.tar.bz2; \
tar -xjf samtools-${SAMVER}.tar.bz2; \
fi
cd ${BINBUILDDIR}/samtools-${SAMVER} && make
# make all-htslib to get bgzip and tabix
cd ${BINBUILDDIR}/samtools-${SAMVER} && make all all-htslib
cp ${BINBUILDDIR}/samtools-${SAMVER}/samtools $@

$(BINCACHEDIR)/tabix: | $(BINCACHEDIR)/samtools
cp ${BINBUILDDIR}/samtools-${SAMVER}/htslib-${SAMVER}/$(@F) $@


$(BINCACHEDIR)/bgzip: | $(BINCACHEDIR)/samtools
cp ${BINBUILDDIR}/samtools-${SAMVER}/htslib-${SAMVER}/$(@F) $@


BCFVER=1.7
$(BINCACHEDIR)/bcftools: | $(BINCACHEDIR) $(BINBUILDDIR)
Expand Down
2 changes: 1 addition & 1 deletion pomoxis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.3.1'
__version__ = '0.3.2'

import argparse
import os
Expand Down
48 changes: 45 additions & 3 deletions pomoxis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from Bio import SeqIO
import intervaltree
import numpy as np
from pysam import FastxFile
import pandas as pd
import pysam

Region = namedtuple('Region', 'ref_name start end')
AlignPos = namedtuple('AlignPos', ('qpos', 'qbase', 'rpos', 'rbase'))
Expand Down Expand Up @@ -50,7 +51,7 @@ def split_fastx(fname, output, chunksize=10000):
:param chunksize: (maximum) length of output records.
"""
with open(output, 'w') as fout:
with FastxFile(fname, persist=False) as fin:
with pysam.FastxFile(fname, persist=False) as fin:
for rec in fin:
name = rec.name
seq = rec.sequence
Expand Down Expand Up @@ -259,7 +260,7 @@ def __call__(self, parser, namespace, values, option_string=None):

def get_seq_lens(fastx):
"""Get sequence lengths from fastx file"""
return [len(r.sequence) for r in FastxFile(fastx)]
return [len(r.sequence) for r in pysam.FastxFile(fastx)]


def coverage_from_fastx():
Expand Down Expand Up @@ -349,3 +350,44 @@ def intervaltrees_from_bed(path_to_bed):
for chrom, start, stop in yield_from_bed(path_to_bed):
trees[chrom].add(intervaltree.Interval(begin=start, end=stop))
return trees


def tag_bam():
"""Command line tool to add tags to a bam."""
parser = argparse.ArgumentParser(
prog='tag_bam',
description='Add a tag to all alignments in a bam.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input', help='Input bam file.')
parser.add_argument('output', help='Output output file.')
parser.add_argument('tag_name', help='Tag name.')
parser.add_argument('tag_value', type=int, help='Tag value.')
args = parser.parse_args()
with pysam.AlignmentFile(args.input) as bam_in:
with pysam.AlignmentFile(args.output, 'wb', header=bam_in.header) as bam_out:
for r in bam_in:
r.set_tag(args.tag_name, args.tag_value)
bam_out.write(r)


def reverse_bed():
"""Convert bed-file coordinates to coordinates on the reverse strand."""
parser = argparse.ArgumentParser(
prog='reverse_bed',
description='Convert bed-file coordinates to coordinates on the reverse strand.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('bed_in', help='Input bed file.')
parser.add_argument('ref_fasta', help='Input reference fasta file.')
parser.add_argument('bed_out', help='Output bed file.')
args = parser.parse_args()

fasta = pysam.FastaFile(args.ref_fasta)
lengths = dict(zip(fasta.references, fasta.lengths))
d = pd.read_csv(args.bed_in, sep='\t', names=['chrom', 'start', 'stop'])

d['chrom_length'] = d['chrom'].map(lambda x: lengths[x])
d['rc_stop'] = d['chrom_length'] - d['start']
d['rc_start'] = d['chrom_length'] - d['stop']
d['chrom_rc'] = d['chrom'] + '_rc'
d[['chrom_rc', 'rc_start', 'rc_stop']].to_csv(args.bed_out, index=False, header=False, sep='\t')
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,12 @@
'long_fastx = {}.util:extract_long_reads'.format(__pkg_name__),
'pomoxis_path = {}:show_prog_path'.format(__pkg_name__),
'qscores_from_summary = {}.qscores_from_summary:main'.format(__pkg_name__),
'reverse_bed = {}.util:reverse_bed'.format(__pkg_name__),
'split_fastx = {}.util:split_fastx_cmdline'.format(__pkg_name__),
'stats_from_bam = {}.stats_from_bam:main'.format(__pkg_name__),
'subsample_bam = {}.subsample_bam:main'.format(__pkg_name__),
'summary_from_stats = {}.summary_from_stats:main'.format(__pkg_name__),
'tag_bam = {}.util:tag_bam'.format(__pkg_name__),
'trim_alignments = {}.trim_alignments:main'.format(__pkg_name__),
'ref_seqs_from_bam = {}.ref_seqs_from_bam:main'.format(__pkg_name__),
'find_indels = {}.find_indels:main'.format(__pkg_name__),
Expand Down

0 comments on commit 023a4cc

Please sign in to comment.