From 670b39f289ddef643f13c4f30f8e970f73a9f1ff Mon Sep 17 00:00:00 2001 From: tombch Date: Wed, 11 Oct 2023 12:19:44 +0100 Subject: [PATCH 1/2] Annotated bases flag on api.query --- Cargo.lock | 2 +- Cargo.toml | 2 +- python/maptide/api.py | 21 +++++++++++++++++---- python/maptide/cli.py | 17 +++-------------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eab8baf..0c6b541 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -186,7 +186,7 @@ dependencies = [ [[package]] name = "maptide" -version = "0.2.1" +version = "0.3.0" dependencies = [ "noodles", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 7b75d23..1b8f5b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "maptide" -version = "0.2.1" +version = "0.3.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/python/maptide/api.py b/python/maptide/api.py index 3dca431..b0e8262 100644 --- a/python/maptide/api.py +++ b/python/maptide/api.py @@ -1,15 +1,19 @@ import os -from typing import Dict, Tuple, List, Optional +from typing import Dict, Tuple, Optional, Any from . import maptide #  type: ignore +BASES = ["A", "C", "G", "T", "DS", "N"] + + def query( bam: str, region: Optional[str] = None, bai: Optional[str] = None, mapping_quality: int = 0, base_quality: int = 0, -) -> Dict[str, Dict[Tuple[int, int], List[int]]]: + annotated: bool = False, +) -> Dict[str, Dict[Tuple[int, int], Any]]: """Performs a pileup over a region, obtaining per-position base frequencies for the provided BAM file. Parameters @@ -24,6 +28,8 @@ def query( Minimum mapping quality for a read to be included in the pileup (default: 0) base_quality : int, optional Minimum base quality for a base within a read to be included in the pileup (default: 0) + annotated : bool, optional + Return frequencies annotated with their bases, as a `dict[str, int]`. Default is to return frequencies only, as a `list[int]` (default: False) Returns ------- @@ -34,9 +40,16 @@ def query( if region: if not bai and os.path.isfile(bam + ".bai"): bai = bam + ".bai" - return maptide.query(bam, bai, region, mapping_quality, base_quality) + data = maptide.query(bam, bai, region, mapping_quality, base_quality) else: - return maptide.all(bam, mapping_quality, base_quality) + data = maptide.all(bam, mapping_quality, base_quality) + + if annotated: + for _, positions in data.items(): + for position, frequencies in positions.items(): + positions[position] = dict(zip(BASES, frequencies)) + + return data def parse_region(region: str) -> Tuple[str, int, int]: diff --git a/python/maptide/cli.py b/python/maptide/cli.py index f9b1edd..f25384d 100644 --- a/python/maptide/cli.py +++ b/python/maptide/cli.py @@ -108,23 +108,12 @@ def run(): "pos", "ins", "cov", - "a", - "c", - "g", - "t", - "ds", - "n", - ] + ] + [base.lower() for base in api.BASES] if args.stats: columns.extend( - [ - "pc_a", - "pc_c", - "pc_g", - "pc_t", - "pc_ds", - "pc_n", + [f"pc_{base.lower()}" for base in api.BASES] + + [ "entropy", "secondary_entropy", ] From d26c030fe67ddc6c25377b52d6abfb2ae1d4634c Mon Sep 17 00:00:00 2001 From: tombch Date: Wed, 11 Oct 2023 12:19:59 +0100 Subject: [PATCH 2/2] Updated README with python script example --- README.md | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4aad681..d97d23a 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,12 @@ ``` $ pip install maptide ``` -Depending on your operating system, the Rust compiler may need to be installed. - -Installation instructions for the Rust compiler can be found here: https://www.rust-lang.org/tools/install #### Build from source Building from source requires the Rust compiler. +Installation instructions for the Rust compiler can be found here: https://www.rust-lang.org/tools/install + Once the Rust compiler is installed: ``` $ git clone https://github.com/CLIMB-COVID/maptide.git @@ -46,12 +45,12 @@ options: Number of decimal places to display (default: 3) ``` -#### Frequencies over all positions: +#### Frequencies over all positions ``` $ maptide /path/to/file.bam ``` -#### Frequencies over a region: +#### Frequencies over a region ``` $ maptide /path/to/file.bam --region chrom:start-end ``` @@ -63,3 +62,25 @@ Index files that do not follow the naming convention `/path/to/file.bam.bai` can ``` $ maptide /path/to/file.bam --region chrom:start-end --index /path/to/index.bai ``` + +#### Example in Python +`maptide` can be used within Python scripts: + +```python +import maptide + +data = maptide.query( + "path/to/file.bam", + region="MN908947.3:100-200", # Obtain frequencies only in this region + annotated=True, # Annotate frequencies with their bases A,C,G,T,DS,N +) + +chrom = "MN908947.3" # Chosen reference/chromosome in the BAM to access +position = (100, 0) # Position 100, insert 0 (i.e. not an insertion) + +# With annotated = True, frequencies are annotated with their bases: +frequencies = data[chrom][position] +print(frequencies) # {'A': 1, 'C': 122, 'G': 0, 'T': 1, 'DS': 13, 'N': 0} + +# If annotated = False, frequencies would be a list i.e. [1, 122, 0, 1, 13, 0] +``` \ No newline at end of file