Skip to content

Commit

Permalink
updating so that merge actually works and doesn't include that revers…
Browse files Browse the repository at this point in the history
…ion commit
  • Loading branch information
aineniamh committed Apr 12, 2024
1 parent f9e0182 commit ee87ac4
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 17 deletions.
22 changes: 15 additions & 7 deletions snipit/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
import os
import argparse
import textwrap
import pkg_resources
import collections

Expand All @@ -27,6 +28,7 @@ def main(sysargs = sys.argv[1:]):

i_group = parser.add_argument_group('Input options')
i_group.add_argument('alignment',help="Input alignment fasta file")
i_group.add_argument("-t","--sequence-type", choices=['nt','aa'], action="store",help="Input sequence type: aa or nt", default="nt", dest="sequence_type")
i_group.add_argument("-r","--reference", action="store",help="Indicates which sequence in the alignment is\nthe reference (by sequence ID).\nDefault: first sequence in alignment", dest="reference")
i_group.add_argument("-l","--labels", action="store",help="Optional csv file of labels to show in output snipit plot. Default: sequence names", dest="labels")
i_group.add_argument("--l-header", action="store",help="Comma separated string of column headers in label csv. First field indicates sequence name column, second the label column. Default: 'name,label'", dest="label_headers",default="name,label")
Expand All @@ -47,7 +49,10 @@ def main(sysargs = sys.argv[1:]):
f_group.add_argument("--width",action="store",type=float,help="Overwrite the default figure width",default=0)
f_group.add_argument("--size-option",action="store",help="Specify options for sizing. Options: expand, scale",dest="size_option",default="scale")
f_group.add_argument("--solid-background",action="store_true",help="Force the plot to have a solid background, rather than a transparent one.",dest="solid_background")
f_group.add_argument("-c","--colour-palette",dest="colour_palette",action="store",help="Specify colour palette. Options: primary, classic, purine-pyrimidine, greyscale, wes, verity",default="classic")
f_group.add_argument("-c","--colour-palette",dest="colour_palette",action="store",
help="Specify colour palette. Options: [classic, classic_extended, primary, purine-pyrimidine, greyscale, wes, verity, ugene]. Use ugene for protein alignments.",default="classic",
choices=["classic","classic_extended","primary","purine-pyrimidine","greyscale","wes","verity","ugene"],
metavar='')
f_group.add_argument("--flip-vertical",action='store_true',help="Flip the orientation of the plot so sequences are below the reference rather than above it.",dest="flip_vertical")
f_group.add_argument("--sort-by-mutation-number", action='store_true',
help="Render the graph with sequences sorted by the number of SNPs relative to the reference (fewest to most). Default: False", dest="sort_by_mutation_number")
Expand All @@ -63,8 +68,11 @@ def main(sysargs = sys.argv[1:]):
s_group.add_argument("--show-indels",action='store_true',help="Include insertion and deletion mutations in snipit plot.",dest="show_indels")
s_group.add_argument('--include-positions', dest='included_positions', type=sfunks.bp_range, nargs='+', default=None, help="One or more range (closed, inclusive; one-indexed) or specific position only included in the output. Ex. '100-150' or Ex. '100 101' Considered before '--exclude-positions'.")
s_group.add_argument('--exclude-positions', dest='excluded_positions', type=sfunks.bp_range, nargs='+', default=None, help="One or more range (closed, inclusive; one-indexed) or specific position to exclude in the output. Ex. '100-150' or Ex. '100 101' Considered after '--include-positions'.")
s_group.add_argument("--exclude-ambig-pos",dest="exclude_ambig_pos",action='store_true',help="Exclude positions with ambig base in any sequences. Considered after '--include-positions'")

s_group.add_argument("--ambig-mode", dest="ambig_mode",choices=['all', 'snps', 'exclude'], default='snpsambi',
help=textwrap.dedent('''Controls how ambiguous bases are handled -
[all] include all ambig such as N,Y,B in all positions;
[snps] only include ambig if a snp is present at the same position;
[exclude] remove all ambig, same as depreciated --exclude-ambig-pos'''))
misc_group = parser.add_argument_group('Misc options')
misc_group.add_argument("-v","--version", action='version', version=f"snipit {__version__}")

Expand Down Expand Up @@ -98,9 +106,9 @@ def main(sysargs = sys.argv[1:]):

reference,alignment = sfunks.get_ref_and_alignment(args.alignment,ref_input,label_map)

snp_dict,record_snps,num_snps = sfunks.find_snps(reference,alignment,args.show_indels)
snp_dict,record_snps,num_snps = sfunks.find_snps(reference,alignment,args.show_indels,args.sequence_type,args.ambig_mode)

record_ambs = sfunks.find_ambiguities(alignment, snp_dict)
record_ambs = sfunks.find_ambiguities(alignment, snp_dict, args.sequence_type)

colours = sfunks.get_colours(args.colour_palette)

Expand All @@ -125,14 +133,14 @@ def main(sysargs = sys.argv[1:]):
args.flip_vertical,
args.included_positions,
args.excluded_positions,
args.exclude_ambig_pos,
args.ambig_mode,
args.sort_by_mutation_number,
args.high_to_low,
args.sort_by_id,
args.sort_by_mutations,
args.recombi_mode,
args.recombi_references)

print(sfunks.green(f"Snipping Complete: {output}"))

if __name__ == '__main__':
main()
45 changes: 35 additions & 10 deletions snipit/scripts/snp_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import warnings
warnings.filterwarnings('ignore')


# imports from other modules
from Bio import SeqIO
from Bio.Seq import Seq
Expand All @@ -35,6 +34,11 @@
CYAN = '\u001b[36m'
DIM = '\033[2m'

NT_BASES = ["A","T","G","C"]
NT_AMBIG = ["W","S","M","K","R","Y","B","D","H","V","N"]
AA_BASES = ["A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V"]
AA_AMBIG = ["X","B","Z","J"]


def bp_range(s):
"""
Expand Down Expand Up @@ -224,8 +228,24 @@ def merge_indels(indel_list,prefix):

return indel_list

def find_snps(reference_seq,input_seqs,show_indels):
non_amb = ["A","T","G","C"]
def find_snps(reference_seq,input_seqs,show_indels,sequence_type,ambig_mode):

# set the appropriate genetic code to use for snp calling
if sequence_type == 'nt':
if ambig_mode == 'snps':
gcode = NT_BASES
elif ambig_mode == 'all':
gcode = NT_BASES + NT_AMBIG
else: # exclude
gcode = NT_BASES
if sequence_type == 'aa':
if ambig_mode == 'snps':
gcode = AA_BASES
elif ambig_mode == 'all':
gcode = AA_BASES + AA_AMBIG
else: #exclude
gcode = AA_BASES

snp_dict = {}

record_snps = {}
Expand All @@ -237,7 +257,7 @@ def find_snps(reference_seq,input_seqs,show_indels):
for i in range(len(query_seq)):
bases = [query_seq[i],reference_seq[i]]
if bases[0] != bases[1]:
if bases[0] in non_amb and bases[1] in non_amb:
if bases[0] in gcode and bases[1] in gcode:

snp = f"{i+1}:{bases[1]}{bases[0]}" # position-reference-query

Expand Down Expand Up @@ -268,15 +288,13 @@ def find_snps(reference_seq,input_seqs,show_indels):

return snp_dict,record_snps,len(var_counter)


def find_ambiguities(alignment, snp_dict,sequence_type):

if sequence_type == "nt":
amb = NT_AMBIG
if sequence_type == "aa":
amb = AA_AMBIG


snp_sites = collections.defaultdict(list)
for seq in snp_dict:
snps = snp_dict[seq]
Expand All @@ -295,7 +313,7 @@ def find_ambiguities(alignment, snp_dict,sequence_type):
for i in snp_sites:
bases = [query_seq[i],snp_sites[i]] #if query not same as ref allele
if bases[0] != bases[1]:
if bases[0] not in ["A","T","G","C"]:
if bases[0] not in amb:

snp = f"{i+1}:{bases[1]}{bases[0]}" # position-outgroup-query
snps.append(snp)
Expand Down Expand Up @@ -363,7 +381,6 @@ def make_graph(num_seqs,
flip_vertical=False,
included_positions=None,
excluded_positions=None,
exclude_ambig_pos=False,
sort_by_mutation_number=False,
high_to_low=True,
sort_by_id=False,
Expand Down Expand Up @@ -464,7 +481,7 @@ def make_graph(num_seqs,
x_position = int(pos)

# if positions with any ambiguities should be ignored, note the position
if exclude_ambig_pos:
if ambig_mode == 'exclude':
excluded_positions.add(x_position)
else:
ref = var[0]
Expand Down Expand Up @@ -649,13 +666,21 @@ def make_graph(num_seqs,
def get_colours(colour_palette):

palettes = {"classic": {"A":"steelblue","C":"indianred","T":"darkseagreen","G":"skyblue"},
"classic_extended": {"A":"steelblue","C":"indianred","T":"darkseagreen",
"G":"skyblue","W":"#FFCC00","S":"#66FF00","M":"#6600FF",
"K":"#66FFCC","R":"#FF00FF","Y":"#FFFF99","B":"#CCFF99",
"D":"#FFFF00","H":"##33FF00","V":"#FF6699","N":"#333333"},
"wes": {"A":"#CC8B3C","C":"#456355","T":"#541F12","G":"#B62A3D"},
"primary": {"A":"green","C":"goldenrod","T":"steelblue","G":"indianred"},
"purine-pyrimidine":{"A":"indianred","C":"teal","T":"teal","G":"indianred"},
"greyscale":{"A":"#CCCCCC","C":"#999999","T":"#666666","G":"#333333"},
"blues":{"A":"#3DB19D","C":"#76C5BF","T":"#423761","G":"steelblue"},
"verity":{"A":"#EC799A","C":"#df6eb7","T":"#FF0080","G":"#9F0251"},
"recombi":{"lineage_1":"steelblue","lineage_2":"#EA5463","Both":"darkseagreen","Private":"goldenrod"}
"recombi":{"lineage_1":"steelblue","lineage_2":"#EA5463","Both":"darkseagreen","Private":"goldenrod"},
"ugene":{"A":"#00ccff","R":"#d5c700","N":"#33ff00","D":"#ffff00","C":"#6600ff","Q":"#3399ff",
"E":"#c0bdbb","G":"#ff5082","H":"#fff233","I":"#00abed","L":"#008fc6","K":"#ffee00",
"M":"#1dc0ff","F":"#3df490","P":"#d5426c","S":"#ff83a7","T":"#ffd0dd","W":"#33cc78",
"Y":"#65ffab","V":"#ff6699","X":"#999999","B":"#999999","Z":"#999999","J":"#999999"}
}
if colour_palette not in palettes:
sys.stderr.write(red(f"Error: please select one of {palettes} for --colour-palette option\n"))
Expand Down

0 comments on commit ee87ac4

Please sign in to comment.