Skip to content

Commit

Permalink
added c3 trimming
Browse files Browse the repository at this point in the history
  • Loading branch information
JLSteenwyk committed Feb 12, 2024
1 parent 78a6402 commit 5242d1f
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 6 deletions.
6 changes: 6 additions & 0 deletions clipkit/args_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def process_args(args) -> dict:
quiet = args.quiet or False
sequence_type = SeqType(args.sequence_type.lower()) if args.sequence_type else None

if codon and mode == TrimmingMode.c3:
logger.warning(
"C3 and codon-based trimming are incompatible.\nCodon-based trimming removes whole codons while C3 removes every third codon position."
)
sys.exit()

return dict(
input_file=input_file,
output_file=output_file,
Expand Down
1 change: 1 addition & 0 deletions clipkit/modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ class TrimmingMode(Enum):
kpic = "kpic"
kpic_gappy = "kpic-gappy"
kpic_smart_gap = "kpic-smart-gap"
c3 = "c3"
11 changes: 7 additions & 4 deletions clipkit/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,12 @@ def determine_site_positions_to_trim(self, mode, gap_threshold, codon=False):
(sites_to_trim_gaps_based, sites_to_trim_classification_based)
)
)

if codon:
elif mode == TrimmingMode.c3:
sites_to_trim = np.arange(3, self._original_length + 1, 3) - 1
if codon and mode != TrimmingMode.c3:
"""
NOTE: ignoring c3 mode otherwise we would ALWAYS trim the entire file by definition.
For each position in sites_to_trim we need the full triplet of codon positions tuple.
Example:
[2, 9] -> [1, 2, 3, 7, 8, 9]
Expand Down Expand Up @@ -258,8 +261,8 @@ def determine_codon_triplet_positions(self, alignment_position):
We filter to make sure we are not including any positions out of range
"""
block = alignment_position // 3
codon_triplet_index_start = block * 3
block = alignment_position // self._codon_size
codon_triplet_index_start = block * self._codon_size
sites = [
codon_triplet_index_start,
codon_triplet_index_start + 1,
Expand Down
4 changes: 3 additions & 1 deletion clipkit/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def create_parser() -> ArgumentParser:
kpic-gappy,
kpi,
kpi-smart-gap,
kpi-gappy>
kpi-gappy,
c3>
-g, --gaps <threshold_of_gaps> specifies gaps threshold
(default: 0.9)
Expand Down Expand Up @@ -116,6 +117,7 @@ def create_parser() -> ArgumentParser:
kpi: keep only parsimony informative sites
kpi-smart-gap: a combination of kpi- and smart-gap-based trimming
kpi-gappy: a combination of kpi- and gappy-based trimming
c3: remove every third codon position
Gaps
Positions with gappyness greater than threshold will be trimmed.
Expand Down
2 changes: 1 addition & 1 deletion clipkit/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.3"
__version__ = "2.2.0"
10 changes: 10 additions & 0 deletions tests/integration/expected/simple.fa_c3
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>1
A-TA
>2
A--A
>3
A--T
>4
AG-T
>5
AC-T
10 changes: 10 additions & 0 deletions tests/integration/samples/simple.fa.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>1
A-TA
>2
A--A
>3
A--T
>4
AG-T
>5
AC-T
44 changes: 44 additions & 0 deletions tests/integration/test_c3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest
from pathlib import Path

from clipkit.clipkit import execute
from clipkit.files import FileFormat
from clipkit.modes import TrimmingMode
from clipkit.settings import DEFAULT_AA_GAP_CHARS, DEFAULT_NT_GAP_CHARS

here = Path(__file__)


@pytest.mark.integration
class TestC3Out(object):
def test_simple_c3(self):
"""
test codon
usage: clipkit simple.fa c3
"""
output_file = "output/simple.fa_c3"

kwargs = dict(
input_file=f"{here.parent}/samples/simple.fa",
output_file=output_file,
input_file_format="fasta",
output_file_format="fasta",
sequence_type=None,
complement=False,
codon=False,
gaps=None,
mode=TrimmingMode.c3,
use_log=False,
gap_characters=DEFAULT_NT_GAP_CHARS,
quiet=True,
)

execute(**kwargs)

with open(f"{here.parent}/expected/simple.fa_c3", "r") as expected:
expected_content = expected.read()

with open(output_file, "r") as out_file:
output_content = out_file.read()

assert expected_content == output_content
6 changes: 6 additions & 0 deletions tests/unit/test_args_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,9 @@ def test_process_args_expected_keywords(self, args):
"quiet",
]
assert sorted(res.keys()) == sorted(expected_keys)

def test_incompatible_codon_args(self, args):
args.codon = True
args.mode = TrimmingMode.c3
with pytest.raises(SystemExit):
process_args(args)

0 comments on commit 5242d1f

Please sign in to comment.