Skip to content

Commit

Permalink
new color scheme mode in alignedSeq
Browse files Browse the repository at this point in the history
  • Loading branch information
CaoTianze committed Aug 24, 2023
1 parent 6e26fd0 commit fce95f9
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 48 deletions.
40 changes: 31 additions & 9 deletions docs/geom_alignedSeq.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
# geom_alignedSeq
A class that represents the sequence alignment diagram
## *class* geom_alignedSeq(self,data: Union[list[str], dict, None] = None,seq_names: Optional[list[str]] = None,seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.75,font_col: str = '#FFFFFF',bg_col_scheme: Union[DataFrame, str] = 'AUTO',bg_low_col: str = 'black',bg_high_col: str = 'yellow',bg_na_col: str = '#333333',**kwargs: Any)
## *Sample code*
```python
from plotnine import ggplot, coord_fixed
from plotnineseqsuite import geom_alignedSeq, theme_seq
from plotnineseqsuite.data import seqs_dna
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed()
```
## *Init param*
```
geom_alignedSeq(self,
data: list[str] | dict | None = None,
seq_names: list[str] | None = None,
seq_type: str = 'AUTO',
namespace: list[str] | None = None,
font: str = 'roboto_medium',
stack_width: float = 0.75,
border_col: str = 'grey',
scheme_applied: str = 'BACKGROUND',
no_scheme_col: str = '#000000',
col_scheme: DataFrame | str = 'AUTO',
low_col: str = 'black',
high_col: str = 'yellow',
na_col: str = '#333333',
**kwargs: Any) -> Any
```
- data
Sequence data or corresponding dict.
- seq_names
Expand All @@ -19,16 +37,20 @@ The letter corresponding to the data.
Font value
- stack_width
The ratio of the size of letters to the standard unit width.
- font_col
The color of the font.
- bg_col_scheme
Color scheme of the backgrounds.
- bg_low_col
- border_col
The color of the border in the background. When it is None, the border of the background will disappear.
- scheme_applied
BACKGROUND or LETTER. Indicates the target to which the color scheme applies.
- no_scheme_col
When a color scheme is applied to the background, this indicates the color of the letters. When a color scheme is applied to letters, this indicates the color of the background.
- col_scheme
Color scheme.
- low_col
Continuous color schemes are available.
- bg_high_col
- high_col
Continuous color schemes are available.
- bg_na_col
Used when the background in the corresponding namespace do not have a color matching value defined.
- na_col
It is available when the corresponding namespace do not have a color matching value defined.
- kwargs
Other arguments passed on to layer().
## *properties*
Expand Down
18 changes: 17 additions & 1 deletion docs/geom_logo.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
# geom_logo
A class that represents the sequence logo
## *class* geom_logo(self,data: Union[list[str], ndarray, dict, None] = None,method: str = 'bits',seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.95,rev_stack_order: bool = False,col_scheme: Union[DataFrame, str] = 'AUTO',low_col: str = 'black',high_col: str = 'yellow',na_col: str = '#333333',**kwargs: Any)
## *Sample code*
```python
from plotnine import ggplot
from plotnineseqsuite import geom_logo, theme_seq
from plotnineseqsuite.data import seqs_dna
ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq()
```
## *Init param*
```
geom_logo(self,
data: list[str] | ndarray | dict | None = None,
method: str = 'bits',
seq_type: str = 'AUTO',
namespace: list[str] | None = None,
font: str = 'roboto_medium',
stack_width: float = 0.95,
rev_stack_order: bool = False,
col_scheme: DataFrame | str = 'AUTO',
low_col: str = 'black',
high_col: str = 'yellow',
na_col: str = '#333333',
**kwargs: Any) -> Any
```
- data
Sequence data or PFM or corresponding dict.
- method
Expand Down
17 changes: 16 additions & 1 deletion docs/geom_seqBar.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# geom_seqBar
A class that represents the sequence histogram
## *class* geom_seqBar(self,data: Union[list[str], ndarray, dict, None] = None,seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.75,bar_col_scheme: Union[DataFrame, str] = 'AUTO',font_col: str = '#808080',low_col: str = 'black',high_col: str = 'yellow',na_col: str = '#333333',**kwargs: Any)
## *Sample code*
```python
from plotnine import ggplot
from plotnineseqsuite import geom_seqBar, theme_seq
from plotnineseqsuite.data import seqs_dna
ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq()
```
## *Init param*
```
geom_seqBar(self,
data: list[str] | ndarray | dict | None = None,
seq_type: str = 'AUTO',
namespace: list[str] | None = None,
font: str = 'roboto_medium',
stack_width: float = 0.75,
bar_col_scheme: DataFrame | str = 'AUTO',
font_col: str = '#808080',
low_col: str = 'black',
high_col: str = 'yellow',
na_col: str = '#333333',
**kwargs: Any) -> Any
```
- data
Sequence data or PFM or corresponding dict.
- seq_type
Expand Down
18 changes: 13 additions & 5 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,25 @@ geom_alignedSeq accepts two types of input, each described in detail below
1. list: a list of aligned sequences
2. dict: It is used for plotting more than one sequence logo at the same time with the help of facets, the key of dict is facet value, and the value of dict is list described above
### No background color
When bg_col_scheme option is None, the picture has no background color.
When col_scheme option is None, the picture has no background color.
```python
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font_col='black', bg_col_scheme=None) + theme_seq() + coord_fixed()
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], no_scheme_col='black', col_scheme=None) + theme_seq() + coord_fixed()
```
![](no_background_color.png)
### No sequence letter
When font option is None, the picture has only the background color.
When font option is None and border_col is None, the picture has only the background color.
```python
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None) + theme_seq() + coord_fixed()
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None, border_col=None) + theme_seq() + coord_fixed()
```
![](no_sequence_letter.png)

### The color scheme switches to letters
When the option scheme_applied is LETTER, the color scheme will be applied from the background to the letters.
```Python
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], no_scheme_col='white', scheme_applied='LETTER') + theme_seq() + coord_fixed()
```
![](switch_color_schme.png)

### Tagging sequences
You can set seq_names parameter to identify the name of the sequence.
```python
Expand Down Expand Up @@ -133,7 +141,7 @@ ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['δ','ε','
![](custom_alphabet_2.png)
## Colour schemes
### Preset color schemes
plotnineSeqSuite has preset color schemes that can be set using the col_scheme parameter in geom_logo, the parameter bar_col_scheme in geom_seqBar and the parameter bg_col_scheme in geom_alignedSeq. By default, the col_scheme is set to AUTO such that the color scheme is automatically chosen based on your sequence type.
plotnineSeqSuite has preset color schemes that can be set using the col_scheme parameter in geom_logo, the parameter bar_col_scheme in geom_seqBar and the parameter col_scheme in geom_alignedSeq. By default, the col_scheme is set to AUTO such that the color scheme is automatically chosen based on your sequence type.

Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data:
```python
Expand Down
Binary file removed docs/index.pdf
Binary file not shown.
Binary file modified docs/no_sequence_letter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/plot_an_aligned_sequences.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/switch_color_schme.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
80 changes: 52 additions & 28 deletions plotnineseqsuite/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ class geom_alignedSeq:
def __init__(self, data: Union[List[str], Dict] = None, seq_names: List[str] = None, seq_type: str = 'AUTO',
namespace: List[str] = None,
font: str = 'roboto_medium', stack_width: float = 0.75, border_col: str = 'grey',
font_col: str = '#000000', bg_col_scheme: Union[DataFrame, str] = 'AUTO',
bg_low_col: str = 'black', bg_high_col: str = 'yellow', bg_na_col: str = '#333333',
scheme_applied: str = 'BACKGROUND', no_scheme_col: str = '#000000',
col_scheme: Union[DataFrame, str] = 'AUTO',
low_col: str = 'black', high_col: str = 'yellow', na_col: str = '#333333',
**kwargs):
self.__kwargs = kwargs
self.__font_col = font_col
self.__no_scheme_col = no_scheme_col
self.__border_col = border_col
self.__scheme_applied = scheme_applied
if stack_width > 1 or stack_width <= 0:
raise Exception('"stack_width" must be between 0 and 1')
if data is None:
Expand All @@ -31,6 +33,8 @@ def __init__(self, data: Union[List[str], Dict] = None, seq_names: List[str] = N
seq_type = 'OTHER'
if seq_type not in {'OTHER', 'AUTO', 'AA', 'DNA', 'RNA'}:
raise Exception("seq_type must be one of 'OTHER' or 'AUTO', or 'AA', or 'DNA', or 'RNA'")
if scheme_applied not in {'BACKGROUND', 'LETTER'}:
raise Exception("scheme_applied must be one of 'BACKGROUND' or 'LETTER'")
if type(data) == list:
data = {1: data}
lvls = data.keys()
Expand All @@ -43,40 +47,48 @@ def __init__(self, data: Union[List[str], Dict] = None, seq_names: List[str] = N
if font is not None:
letter_data = reduce(lambda x, y: concat([x, y]), map(lambda x: x['letter_data'], data_sp))

if type(bg_col_scheme) is str:
bg_cs_dict = get_col_scheme(bg_col_scheme, seq_type)
elif type(bg_col_scheme) is dict and type(bg_col_scheme['cs']) == DataFrame:
bg_cs_dict = bg_col_scheme
if type(col_scheme) is str:
cs_dict = get_col_scheme(col_scheme, seq_type)
elif type(col_scheme) is dict and type(col_scheme['cs']) == DataFrame:
cs_dict = col_scheme

if bg_col_scheme is not None:
legend_title = bg_cs_dict['name']
colscale_gradient = True if is_numeric_dtype(bg_cs_dict['cs']['group']) else False
if col_scheme is not None:
legend_title = cs_dict['name']
colscale_gradient = True if is_numeric_dtype(cs_dict['cs']['group']) else False
if colscale_gradient:
bg_cs_dict['cs'] = concat([bg_cs_dict['cs'],DataFrame(data={'letter':['-'],'group':[0]})])
colscale_opts = scale_fill_gradient(low=bg_low_col, high=bg_high_col, name=legend_title,
na_value=bg_na_col)
# TODO check contain -, then add -
cs_dict['cs'] = concat([cs_dict['cs'], DataFrame(data={'letter': ['-'], 'group': [0]})])
colscale_opts = scale_fill_gradient(low=low_col, high=high_col, name=legend_title,
na_value=na_col)
else:
bg_cs_dict['cs'] = concat([bg_cs_dict['cs'], DataFrame(data={'letter': ['-'], 'group': ['-'], 'col': ['#FFFFFF']})])
tmp = bg_cs_dict['cs'].drop_duplicates(subset=['group']).dropna(subset=['group'])
cs_dict['cs'] = concat(
[cs_dict['cs'], DataFrame(data={'letter': ['-'], 'group': ['-'], 'col': ['#FFFFFF']})])
tmp = cs_dict['cs'].drop_duplicates(subset=['group']).dropna(subset=['group'])
col_map = {}
for item in map(lambda x, y: {x: y}, tmp['group'], tmp['col']):
col_map.update(item)
colscale_opts = scale_fill_manual(values=col_map, name=legend_title, na_value=bg_na_col)
bg_data = merge(bg_data, bg_cs_dict['cs'], how='left')
self.bg_data = bg_data
colscale_opts = scale_fill_manual(values=col_map, name=legend_title, na_value=na_col)
if scheme_applied == 'BACKGROUND':
bg_data = merge(bg_data, cs_dict['cs'], how='left')
else:
letter_data = merge(letter_data, cs_dict['cs'], how='left')
self.colscale_opts = colscale_opts
else:
self.bg_data = None
if scheme_applied == 'BACKGROUND':
bg_data = None
else:
letter_data = None
self.colscale_opts = None

if font is not None:
letter_data = letter_data.sort_values(by='order').reset_index(drop=True)
letter_data['group_by'] = letter_data.apply(
lambda x: '{}.{}.{}.{}'.format(x['seq_group'], x['letter'], x['position'], x['y_index']), axis=1)
self.letter_data = letter_data
else:
self.letter_data = None
self.scale_x_continuous = scale_x_continuous(breaks=lambda x: range(floor(x[0]), ceil(x[1])), expand=(0,0))
letter_data = None
self.bg_data = bg_data
self.letter_data = letter_data
self.scale_x_continuous = scale_x_continuous(breaks=lambda x: range(floor(x[0]), ceil(x[1])), expand=(0, 0))
self.scale_y_continuous = scale_y_continuous(breaks=None)
if seq_names is not None:
self.scale_y_continuous = scale_y_continuous(breaks=lambda x: [k + 0.5 for k in range(0, int(x[1]))],
Expand Down Expand Up @@ -130,15 +142,27 @@ def to_df_data(x, y_index):
def __radd__(self, gg):
params = []
if self.bg_data is not None:
bg_layer = geom_tile(data=self.bg_data,
mapping=aes(x='x', y='y', width='width', height='height', fill='group'),
color=self.__border_col,
**self.__kwargs)
if self.__scheme_applied == 'BACKGROUND':
bg_layer = geom_tile(data=self.bg_data,
mapping=aes(x='x', y='y', width='width', height='height', fill='group'),
color=self.__border_col,
**self.__kwargs)
else:
bg_layer = geom_tile(data=self.bg_data,
mapping=aes(x='x', y='y', width='width', height='height'),
fill=self.__no_scheme_col,
color=self.__border_col,
**self.__kwargs)
params.append(bg_layer)
params.append(self.colscale_opts)
if self.letter_data is not None:
letter_layer = geom_polygon(data=self.letter_data, mapping=aes(x='x', y='y', group='group_by'),
fill=self.__font_col, **self.__kwargs)
if self.__scheme_applied == 'BACKGROUND':
letter_layer = geom_polygon(data=self.letter_data, mapping=aes(x='x', y='y', group='group_by'),
fill=self.__no_scheme_col, **self.__kwargs)
else:
letter_layer = geom_polygon(data=self.letter_data,
mapping=aes(x='x', y='y', group='group_by', fill='group'),
**self.__kwargs)
params.append(letter_layer)
params.extend([self.scale_x_continuous, self.scale_y_continuous, self.ylab, self.xlab])
gg = gg + params
Expand Down
3 changes: 2 additions & 1 deletion tests/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ def test_geom_alignedSeq():
ggplot() + geom_alignedSeq(data=seqs, font=None) + theme_seq()
ggplot() + geom_alignedSeq(data=seqs_dna['MA0013.1'], font=None) + theme_seq()
ggplot() + geom_alignedSeq(data=seqs) + theme_seq() + coord_fixed()
ggplot() + geom_alignedSeq(data=seqs, scheme_applied='LETTER', no_scheme_col='white') + theme_seq() + coord_fixed()
ggplot() + geom_alignedSeq(data=seqs_dna['MA0013.1']) + theme_seq()
ggplot() + geom_alignedSeq(data=seqs,
seq_names=['test-aaa', 'test-b', 'test-c', 'test-d', 'test-e', 'test-ff']) + theme_seq()
ggplot() + geom_alignedSeq(data=seqs, font_col='black', bg_col_scheme=None) + theme_seq()
ggplot() + geom_alignedSeq(data=seqs, no_scheme_col='black', col_scheme=None) + theme_seq()
8 changes: 6 additions & 2 deletions tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ def test_plot_an_aligned_sequences():


def test_no_background_color():
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font_col='black', bg_col_scheme=None) + theme_seq() + coord_fixed()
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], no_scheme_col='black', col_scheme=None) + theme_seq() + coord_fixed()


def test_no_sequence_letter():
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None) + theme_seq() + coord_fixed()
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None, border_col=None) + theme_seq() + coord_fixed()


def test_switch_color_schme():
ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], no_scheme_col='white', scheme_applied='LETTER') + theme_seq() + coord_fixed()


def test_tagging_sequences():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
O.save('Fig. 2O.png')
P = ggplot() + geom_seqBar(seqs_dna['MA0013.1'], font=None) + ggtitle('P') + theme_seq()# Fig. 2P
P.save('Fig. 2P.png')
Q = ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font_col='black', bg_col_scheme=None) + ggtitle(
Q = ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], no_scheme_col='black', col_scheme=None) + ggtitle(
'Q') + theme_seq() + coord_fixed() # Fig. 2G
Q.save('Fig. 2Q.png')
from plotnine import scale_y_continuous
Expand Down

0 comments on commit fce95f9

Please sign in to comment.