404
+ +Page not found
+ + +diff --git a/.gitignore b/.gitignore index 217140a..acefbf0 100644 --- a/.gitignore +++ b/.gitignore @@ -147,5 +147,4 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ -site \ No newline at end of file +.idea/ \ No newline at end of file diff --git a/site/404.html b/site/404.html new file mode 100644 index 0000000..cc3f774 --- /dev/null +++ b/site/404.html @@ -0,0 +1,129 @@ + + +
+ + + + +Page not found
+ + +Default color schemes: chemistry, chemistry2, hydrophobicity, nucleotide, nucleotide2, base_pairing, clustalx, taylor.
+from plotnineseqsuite.col_schemes import make_col_scheme
+cs1 = make_col_scheme(chars=['A', 'T', 'C', 'G'], groups=['gr1', 'gr1', 'gr2', 'gr2'],cols=['purple', 'purple', 'blue', 'blue'])
+cs2 = make_col_scheme(chars=['A', 'T', 'C', 'G'], values=[1,2,3,4])
+
+The function is used to create custom color style themes.
+Name of custom scheme. It will display in legend.
+Letters will used to plot.
+Used in a custom discrete color scheme. It groups letters.
+Used in a custom discrete color scheme. It represents the RGB value of the grouped color.
+Used in a custom continuous color scheme. It represents the numeric value of the corresponding letter.
+This function is used to get the built-in color theme of the type of the given sequence.
+from plotnineseqsuite.col_schemes import get_col_scheme
+col_df = get_col_scheme(col_scheme='chemistry')
+
+One of the default color schemes.
+AA or DNA or RNA
+ +Default fonts: times_new_roman, arial, courier_new, akrobat_bold, xkcd_regular, akrobat_regular, helvetica_bold, helvetica_light, helvetica_regular, roboto_bold, roboto_medium, roboto_regular, roboto_slab_bold, roboto_slab_light, roboto_slab_regular.
+Get all fonts.
+from plotnineseqsuite import get_font
+f_df = get_font(font_name='times_new_roman')
+
+Gets the specified font.
+Name of one of the default fonts.
+ +A class that represents the sequence alignment diagram
+from plotnine import ggplot, coord_fixed
+from plotnineseqsuite import geom_alignedSeq, theme_seq
+from plotnineseqsuite.data import seqs_dna
+ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed()
+
+Sequence data or corresponding dict.
+The name corresponding to the sequence data.
+OTHER, AA, DNA, RNA
+The letter corresponding to the data.
+Font value
+The ratio of the size of letters to the standard unit width.
+The color of the font.
+Color scheme of the backgrounds.
+Continuous color schemes are available.
+Continuous color schemes are available.
+Used when the background in the corresponding namespace do not have a color matching value defined.
+Other arguments passed on to layer().
+ +A class that represents the sequence logo
+from plotnine import ggplot
+from plotnineseqsuite import geom_logo, theme_seq
+from plotnineseqsuite.data import seqs_dna
+ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq()
+
+Sequence data or PFM or corresponding dict.
+bits, probability, custom
+OTHER, AA, DNA, RNA
+The letter corresponding to the data. If the type of data is ndarray, the order of the namespaces must correspond to that of ndarray.
+Font value
+The ratio of the size of letters to the standard unit width.
+Order of letter stack is reversed.
+Color scheme of the letters.
+Continuous color schemes are available.
+Continuous color schemes are available.
+Used when the letters in the corresponding namespace do not have a color matching value defined.
+Other arguments passed on to layer().
+ +A class that represents the sequence histogram
+from plotnine import ggplot
+from plotnineseqsuite import geom_seqBar, theme_seq
+from plotnineseqsuite.data import seqs_dna
+ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq()
+
+Sequence data or PFM or corresponding dict.
+OTHER, AA, DNA, RNA
+The letter corresponding to the data. If the type of data is ndarray, the order of the namespaces must correspond to that of ndarray.
+Font value
+The ratio of the size of letters and the width of bars to the standard unit width.
+Color scheme of the cylinder.
+The color of the font.
+Continuous color schemes are available.
+Continuous color schemes are available.
+Used when the letters in the corresponding namespace do not have a color matching value defined.
+Other arguments passed on to layer().
+ +pip install plotnineseqsuite
from plotnineseqsuite.data import seqs_dna, seqs_aa ,pfms_dna
+
+This loads three sample data sets:
+seqs_dna
: dict of binding sites for 12 transcription factors obtained from FASTA files in JASPAR. The keys represent the JASPAR ID.pfms_dna
: dict of position frequency matrices for four transcription factors obtained from JASPAR. The keys names represent the JASPAR ID.seqs_aa
: dict of kinase-substrate phosphorylation sites obtained from Wagih et al. The keys represent the names of the kinases associated with the phosphosites.You can draw an aligned sequences using ggplot function, with geom_alignedSeq. Let’s try this on sequences for one of the transcription factors from JASPAR:
+from plotnine import ggplot, coord_fixed
+from plotnineseqsuite.align import geom_alignedSeq
+from plotnineseqsuite.theme import theme_seq
+ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed()
+
+
+geom_alignedSeq accepts two types of input, each described in detail below
+When bg_col_scheme option is None, the picture has no background color.
+ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font_col='black', bg_col_scheme=None) + theme_seq() + coord_fixed()
+
+
+When font option is None, the picture has only the background color.
+ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None) + theme_seq() + coord_fixed()
+
+
+You can set seq_names parameter to identify the name of the sequence.
+names=['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f']
+ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) + theme_seq() + coord_fixed()
+
+
+You can draw a sequence logos using ggplot function, with geom_logo. Let’s try this on sequences for one of the transcription factors from JASPAR:
+from plotnine import ggplot
+from plotnineseqsuite.logo import geom_logo
+from plotnineseqsuite.theme import theme_seq
+ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq()
+
+
+geom_logo accepts three types of input, each described in detail below
+The following generates a sequence logo using a position frequency matrix from the sample data
+ggplot() + geom_logo(pfms_dna['MA0018.2'],seq_type='DNA') + theme_seq()
+
+
+geom_logo supports two sequence logo methods through the method options: ‘bits’ and ‘probability’. By default, the bits is used.
+ggplot() + geom_logo( seqs_dna['MA0001.1'], method = 'bits' ) + theme_seq()
+ggplot() + geom_logo( seqs_dna['MA0001.1'], method = 'probability' ) + theme_seq()
+
++
+If you have your own height metric for each letter, simply create a matrix where each cell is a the desired height, and set the method to custom. You can even have negative heights. Here’s a simple example:
+import numpy as np
+custom_mat = np.random.randn(4,5)
+ggplot() + geom_logo(custom_mat, method='custom', seq_type='DNA') + theme_seq()
+
+
+You can draw a conservation bar of aligned sequences using ggplot function, with geom_seqBar. Let’s try this on sequences for one of the transcription factors from JASPAR:
+from plotnineseqsuite.bar import geom_seqBar
+ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq()
+
+
+geom_seqBar accepts three types of input, each described in detail below
+The following generates a sequence histogram using a position frequency matrix from the sample data
+ggplot() + geom_seqBar(pfms_dna['MA0018.2'],seq_type='DNA') + theme_seq()
+
+
+When font option is None, the picture has only the bar.
+ggplot() + geom_seqBar(seqs_dna['MA0013.1'], font=None) + theme_seq()
+
+
+Amino acids, DNA and RNA sequence types are all supported by geom_logo, geom_seqBar and geom_alignedSeq. By default, plotnineSeqSuite will try to guess your sequence type. You can explicitly set the sequence type through the seq_type option.
+Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data:
+ggplot() + geom_logo( seqs_aa['AKT1'], seq_type = 'AA' ) + theme_seq()
+
+
+If you want to define a custom alphabet you can do so by setting namespace with your desired custom alphabet. For example, lets say you wanted a sequence logo of zeros and ones:
+from plotnine.guides import guides
+seqs_numeric = list(map(lambda x: x.replace('A','1').replace('T','2').replace('G','3').replace('C','4'), seqs_dna['MA0001.1']))
+ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['1','2','3','4']) + theme_seq()+guides(fill=False)
+
+
+Greek letters are also supported:
seqs_numeric = list(map(lambda x: x.replace('A','δ').replace('T','ε').replace('G','ψ').replace('C','λ'), seqs_dna['MA0001.1']))
+ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['δ','ε','ψ','λ']) + theme_seq()+guides(fill=False)
+
+
+plotnineSeqSuite has preset color schemes that can be set using the col_scheme parameter in geom_logo, the parameter bar_col_scheme in geom_seqBar and the parameter bg_col_scheme in geom_alignedSeq. By default, the col_scheme is set to AUTO such that the color scheme is automatically chosen based on your sequence type.
+Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data:
+ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme='base_pairing') + theme_seq()
+
+
+If the presets are not enough for you, you can define custom discrete or continuous color schemes using the col_schemes.make_col_scheme function. Here are two examples of discrete and continuous color schemes.
+from plotnineseqsuite.col_schemes import make_col_scheme
+cs1 = make_col_scheme(chars=['A', 'T', 'C', 'G'], groups=['gr1', 'gr1', 'gr2', 'gr2'],cols=['purple', 'purple', 'blue', 'blue'])
+ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme=cs1) + theme_seq()
+
+
+cs2 = make_col_scheme(chars=['A', 'T', 'C', 'G'], values=[1,2,3,4])
+ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme=cs2) + theme_seq()
+
+
+You can plot more than one grouped sequences at the same time with the help of facets. plotnineSeqSuite will accept a dict. The keys of the dict will be used as the facet titles. Take geom_logo for example.
+from plotnine import facet_wrap
+ggplot() + geom_logo(seqs_dna)+ theme_seq()+facet_wrap('~seq_group', ncol=4, scales='free_x')
+
+
+You can adjust the font of letters by setting the font parameter. To list all the available color schemes use the font.list_fonts function. Take geom_logo for example.
+from plotnine import ggtitle
+import patchworklib as pw
+from plotnineseqsuite.font import list_fonts
+
+fonts = list_fonts()
+for i in range(0,12,3):
+ g1 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i]) + theme_seq()+ggtitle(fonts[i]))
+ g2 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i+1]) + theme_seq() + ggtitle(fonts[i+1]))
+ g3 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i+2]) + theme_seq() + ggtitle(fonts[i+2]))
+ if i == 0:
+ allgg = g1|g2|g3
+ else:
+ temp = g1|g2|g3
+ allgg = temp/allgg
+allgg.savefig()
+
+
+plotnineSeqSuite is based on object-oriented design. The *_data property of class geom_logo and geom_seqBar, geom_alignedSeq is a core feature.
+Without using any packages like patchwork and cowplot, plotnineSeqSuite can easily plot geom_logo and geom_seqBar, geom_alignedSeq in one figure. I’ll demonstrate with an example plotting probability sequence logo, aligned sequences and sequence histogram in one figure via changed *_data property.
+from plotnine import scale_y_continuous
+names = ['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f']
+seqs = geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names)
+logo = geom_logo(seqs_dna['MA0013.1'], method='probability')
+logo.data['y'] = logo.data['y']+6.1
+bar = geom_seqBar(seqs_dna['MA0013.1'], font=None)
+bar.bar_data['y'] = bar.bar_data['y'] - 6.1
+ggplot() + logo + bar + seqs + theme_seq() + scale_y_continuous(breaks=lambda x: [k + 0.5 for k in range(0, len(names))], labels=names)
+
+
+When the input sequence fragment does not start at 1, you can modify the x value of the property to display the correct starting position.
+names = ['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f']
+seqs = geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names)
+seqs.bg_data['x'] = seqs.bg_data['x']+3333
+seqs.letter_data['x'] = seqs.letter_data['x']+3333
+ggplot() + seqs + theme_seq()
+
+
+Because plotnineSeqSuite is an extension of plotnine, functions of plotnine can be used without obstacles. Here is an example of drawing rectangles, lines and text.
+from plotnine.geoms import annotate
+ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + annotate('rect', xmin=0.5, xmax=3.5, ymin=-0.05, ymax=6.1,alpha=.1, color='black') + theme_seq()
+
+
+ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + annotate('segment', x=1.5, xend=2.5, y=0, yend=0, size=2, color='red') + annotate('segment', x=4.5, xend=7.5, y=0, yend=0, size=2, color='red') + annotate('segment', x=8.5, xend=11.5, y=0, yend=0,size=2, color='red') + annotate('text', x=6, y=-0.2,label='A is the most', color='red') + theme_seq()
+
+
+
+ ' + escapeHtml(summary) +'
' + noResultsText + '
'); + } +} + +function doSearch () { + var query = document.getElementById('mkdocs-search-query').value; + if (query.length > min_search_length) { + if (!window.Worker) { + displayResults(search(query)); + } else { + searchWorker.postMessage({query: query}); + } + } else { + // Clear results for short queries + displayResults([]); + } +} + +function initSearch () { + var search_input = document.getElementById('mkdocs-search-query'); + if (search_input) { + search_input.addEventListener("keyup", doSearch); + } + var term = getSearchTermFromLocation(); + if (term) { + search_input.value = term; + doSearch(); + } +} + +function onWorkerMessage (e) { + if (e.data.allowSearch) { + initSearch(); + } else if (e.data.results) { + var results = e.data.results; + displayResults(results); + } else if (e.data.config) { + min_search_length = e.data.config.min_search_length-1; + } +} + +if (!window.Worker) { + console.log('Web Worker API not supported'); + // load index in main thread + $.getScript(joinUrl(base_url, "search/worker.js")).done(function () { + console.log('Loaded worker'); + init(); + window.postMessage = function (msg) { + onWorkerMessage({data: msg}); + }; + }).fail(function (jqxhr, settings, exception) { + console.error('Could not load worker.js'); + }); +} else { + // Wrap search in a web worker + var searchWorker = new Worker(joinUrl(base_url, "search/worker.js")); + searchWorker.postMessage({init: true}); + searchWorker.onmessage = onWorkerMessage; +} diff --git a/site/search/search_index.json b/site/search/search_index.json new file mode 100644 index 0000000..94ea4c2 --- /dev/null +++ b/site/search/search_index.json @@ -0,0 +1 @@ +{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Getting Started Installation pip install plotnineseqsuite Load sample data from plotnineseqsuite.data import seqs_dna, seqs_aa ,pfms_dna This loads three sample data sets: seqs_dna : dict of binding sites for 12 transcription factors obtained from FASTA files in JASPAR . The keys represent the JASPAR ID. pfms_dna : dict of position frequency matrices for four transcription factors obtained from JASPAR . The keys names represent the JASPAR ID. seqs_aa : dict of kinase-substrate phosphorylation sites obtained from Wagih et al. The keys represent the names of the kinases associated with the phosphosites. Visualizing aligned sequences Plot an aligned sequences You can draw an aligned sequences using ggplot function, with geom_alignedSeq. Let\u2019s try this on sequences for one of the transcription factors from JASPAR: from plotnine import ggplot, coord_fixed from plotnineseqsuite.align import geom_alignedSeq from plotnineseqsuite.theme import theme_seq ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed() Accepted input formats geom_alignedSeq accepts two types of input, each described in detail below list: a list of aligned sequences dict: It is used for plotting more than one sequence logo at the same time with the help of facets, the key of dict is facet value, and the value of dict is list described above No background color When bg_col_scheme option is None, the picture has no background color. ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font_col='black', bg_col_scheme=None) + theme_seq() + coord_fixed() No sequence letter When font option is None, the picture has only the background color. ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None) + theme_seq() + coord_fixed() Tagging sequences You can set seq_names parameter to identify the name of the sequence. names=['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f'] ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) + theme_seq() + coord_fixed() Visualizing sequence logo Plot a sequence logo You can draw a sequence logos using ggplot function, with geom_logo. Let\u2019s try this on sequences for one of the transcription factors from JASPAR: from plotnine import ggplot from plotnineseqsuite.logo import geom_logo from plotnineseqsuite.theme import theme_seq ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq() Accepted input formats geom_logo accepts three types of input, each described in detail below list: a list of aligned sequences numpy.ndarray: a position frequency matrix, where the row is the letter, and column is the position. Note: The order of the rows corresponds to the NAMESPACE one by one. dict: It is used for plotting more than one sequence logo at the same time with the help of facets, the key of dict is facet value, and the value of dict is list or numpy.ndarray described above The following generates a sequence logo using a position frequency matrix from the sample data ggplot() + geom_logo(pfms_dna['MA0018.2'],seq_type='DNA') + theme_seq() Plotting methods geom_logo supports two sequence logo methods through the method options: \u2018bits\u2019 and \u2018probability\u2019. By default, the bits is used. ggplot() + geom_logo( seqs_dna['MA0001.1'], method = 'bits' ) + theme_seq() ggplot() + geom_logo( seqs_dna['MA0001.1'], method = 'probability' ) + theme_seq() Custom-height logos If you have your own height metric for each letter, simply create a matrix where each cell is a the desired height, and set the method to custom. You can even have negative heights. Here\u2019s a simple example: import numpy as np custom_mat = np.random.randn(4,5) ggplot() + geom_logo(custom_mat, method='custom', seq_type='DNA') + theme_seq() Visualizing sequence histogram Plot a sequence histogram You can draw a conservation bar of aligned sequences using ggplot function, with geom_seqBar. Let\u2019s try this on sequences for one of the transcription factors from JASPAR: from plotnineseqsuite.bar import geom_seqBar ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq() Accepted input formats geom_seqBar accepts three types of input, each described in detail below list: a list of aligned sequences numpy.ndarray: a position frequency matrix, where the row is the letter, and column is the position. Note: The order of the rows corresponds to the NAMESPACE one by one. dict: It is used for plotting more than one sequence histogram at the same time with the help of facets, the key of dict is facet value, and the value of dict is list or numpy.ndarray described above The following generates a sequence histogram using a position frequency matrix from the sample data ggplot() + geom_seqBar(pfms_dna['MA0018.2'],seq_type='DNA') + theme_seq() No sequence letter When font option is None, the picture has only the bar. ggplot() + geom_seqBar(seqs_dna['MA0013.1'], font=None) + theme_seq() Sequence types Preset alphabets Amino acids, DNA and RNA sequence types are all supported by geom_logo, geom_seqBar and geom_alignedSeq. By default, plotnineSeqSuite will try to guess your sequence type. You can explicitly set the sequence type through the seq_type option. Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data: ggplot() + geom_logo( seqs_aa['AKT1'], seq_type = 'AA' ) + theme_seq() Custom alphabet If you want to define a custom alphabet you can do so by setting namespace with your desired custom alphabet. For example, lets say you wanted a sequence logo of zeros and ones: from plotnine.guides import guides seqs_numeric = list(map(lambda x: x.replace('A','1').replace('T','2').replace('G','3').replace('C','4'), seqs_dna['MA0001.1'])) ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['1','2','3','4']) + theme_seq()+guides(fill=False) Greek letters are also supported: seqs_numeric = list(map(lambda x: x.replace('A','\u03b4').replace('T','\u03b5').replace('G','\u03c8').replace('C','\u03bb'), seqs_dna['MA0001.1'])) ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['\u03b4','\u03b5','\u03c8','\u03bb']) + theme_seq()+guides(fill=False) Colour schemes Preset color schemes plotnineSeqSuite has preset color schemes that can be set using the col_scheme parameter in geom_logo, the parameter bar_col_scheme in geom_seqBar and the parameter bg_col_scheme in geom_alignedSeq. By default, the col_scheme is set to AUTO such that the color scheme is automatically chosen based on your sequence type. Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data: ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme='base_pairing') + theme_seq() Custom color schemes If the presets are not enough for you, you can define custom discrete or continuous color schemes using the col_schemes.make_col_scheme function. Here are two examples of discrete and continuous color schemes. Discrete color schemes from plotnineseqsuite.col_schemes import make_col_scheme cs1 = make_col_scheme(chars=['A', 'T', 'C', 'G'], groups=['gr1', 'gr1', 'gr2', 'gr2'],cols=['purple', 'purple', 'blue', 'blue']) ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme=cs1) + theme_seq() Continuous color schemes cs2 = make_col_scheme(chars=['A', 'T', 'C', 'G'], values=[1,2,3,4]) ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme=cs2) + theme_seq() Multiple grouped sequences You can plot more than one grouped sequences at the same time with the help of facets. plotnineSeqSuite will accept a dict. The keys of the dict will be used as the facet titles. Take geom_logo for example. from plotnine import facet_wrap ggplot() + geom_logo(seqs_dna)+ theme_seq()+facet_wrap('~seq_group', ncol=4, scales='free_x') Fonts You can adjust the font of letters by setting the font parameter. To list all the available color schemes use the font.list_fonts function. Take geom_logo for example. from plotnine import ggtitle import patchworklib as pw from plotnineseqsuite.font import list_fonts fonts = list_fonts() for i in range(0,12,3): g1 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i]) + theme_seq()+ggtitle(fonts[i])) g2 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i+1]) + theme_seq() + ggtitle(fonts[i+1])) g3 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i+2]) + theme_seq() + ggtitle(fonts[i+2])) if i == 0: allgg = g1|g2|g3 else: temp = g1|g2|g3 allgg = temp/allgg allgg.savefig() Advanced plotnineSeqSuite plotnineSeqSuite is based on object-oriented design. The *_data property of class geom_logo and geom_seqBar, geom_alignedSeq is a core feature. Combining plots Without using any packages like patchwork and cowplot, plotnineSeqSuite can easily plot geom_logo and geom_seqBar, geom_alignedSeq in one figure. I\u2019ll demonstrate with an example plotting probability sequence logo, aligned sequences and sequence histogram in one figure via changed *_data property. from plotnine import scale_y_continuous names = ['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f'] seqs = geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) logo = geom_logo(seqs_dna['MA0013.1'], method='probability') logo.data['y'] = logo.data['y']+6.1 bar = geom_seqBar(seqs_dna['MA0013.1'], font=None) bar.bar_data['y'] = bar.bar_data['y'] - 6.1 ggplot() + logo + bar + seqs + theme_seq() + scale_y_continuous(breaks=lambda x: [k + 0.5 for k in range(0, len(names))], labels=names) Modify the starting position When the input sequence fragment does not start at 1, you can modify the x value of the property to display the correct starting position. names = ['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f'] seqs = geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) seqs.bg_data['x'] = seqs.bg_data['x']+3333 seqs.letter_data['x'] = seqs.letter_data['x']+3333 ggplot() + seqs + theme_seq() Used concurrently with other functions of plotnine. Because plotnineSeqSuite is an extension of plotnine, functions of plotnine can be used without obstacles. Here is an example of drawing rectangles, lines and text. from plotnine.geoms import annotate ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + annotate('rect', xmin=0.5, xmax=3.5, ymin=-0.05, ymax=6.1,alpha=.1, color='black') + theme_seq() ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + annotate('segment', x=1.5, xend=2.5, y=0, yend=0, size=2, color='red') + annotate('segment', x=4.5, xend=7.5, y=0, yend=0, size=2, color='red') + annotate('segment', x=8.5, xend=11.5, y=0, yend=0,size=2, color='red') + annotate('text', x=6, y=-0.2,label='A is the most', color='red') + theme_seq()","title":"Getting Started"},{"location":"#getting-started","text":"","title":"Getting Started"},{"location":"#installation","text":"pip install plotnineseqsuite","title":"Installation"},{"location":"#load-sample-data","text":"from plotnineseqsuite.data import seqs_dna, seqs_aa ,pfms_dna This loads three sample data sets: seqs_dna : dict of binding sites for 12 transcription factors obtained from FASTA files in JASPAR . The keys represent the JASPAR ID. pfms_dna : dict of position frequency matrices for four transcription factors obtained from JASPAR . The keys names represent the JASPAR ID. seqs_aa : dict of kinase-substrate phosphorylation sites obtained from Wagih et al. The keys represent the names of the kinases associated with the phosphosites.","title":"Load sample data"},{"location":"#visualizing-aligned-sequences","text":"","title":"Visualizing aligned sequences"},{"location":"#plot-an-aligned-sequences","text":"You can draw an aligned sequences using ggplot function, with geom_alignedSeq. Let\u2019s try this on sequences for one of the transcription factors from JASPAR: from plotnine import ggplot, coord_fixed from plotnineseqsuite.align import geom_alignedSeq from plotnineseqsuite.theme import theme_seq ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed()","title":"Plot an aligned sequences"},{"location":"#accepted-input-formats","text":"geom_alignedSeq accepts two types of input, each described in detail below list: a list of aligned sequences dict: It is used for plotting more than one sequence logo at the same time with the help of facets, the key of dict is facet value, and the value of dict is list described above","title":"Accepted input formats"},{"location":"#no-background-color","text":"When bg_col_scheme option is None, the picture has no background color. ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font_col='black', bg_col_scheme=None) + theme_seq() + coord_fixed()","title":"No background color"},{"location":"#no-sequence-letter","text":"When font option is None, the picture has only the background color. ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], font=None) + theme_seq() + coord_fixed()","title":"No sequence letter"},{"location":"#tagging-sequences","text":"You can set seq_names parameter to identify the name of the sequence. names=['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f'] ggplot() + geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) + theme_seq() + coord_fixed()","title":"Tagging sequences"},{"location":"#visualizing-sequence-logo","text":"","title":"Visualizing sequence logo"},{"location":"#plot-a-sequence-logo","text":"You can draw a sequence logos using ggplot function, with geom_logo. Let\u2019s try this on sequences for one of the transcription factors from JASPAR: from plotnine import ggplot from plotnineseqsuite.logo import geom_logo from plotnineseqsuite.theme import theme_seq ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq()","title":"Plot a sequence logo"},{"location":"#accepted-input-formats_1","text":"geom_logo accepts three types of input, each described in detail below list: a list of aligned sequences numpy.ndarray: a position frequency matrix, where the row is the letter, and column is the position. Note: The order of the rows corresponds to the NAMESPACE one by one. dict: It is used for plotting more than one sequence logo at the same time with the help of facets, the key of dict is facet value, and the value of dict is list or numpy.ndarray described above The following generates a sequence logo using a position frequency matrix from the sample data ggplot() + geom_logo(pfms_dna['MA0018.2'],seq_type='DNA') + theme_seq()","title":"Accepted input formats"},{"location":"#plotting-methods","text":"geom_logo supports two sequence logo methods through the method options: \u2018bits\u2019 and \u2018probability\u2019. By default, the bits is used. ggplot() + geom_logo( seqs_dna['MA0001.1'], method = 'bits' ) + theme_seq() ggplot() + geom_logo( seqs_dna['MA0001.1'], method = 'probability' ) + theme_seq()","title":"Plotting methods"},{"location":"#custom-height-logos","text":"If you have your own height metric for each letter, simply create a matrix where each cell is a the desired height, and set the method to custom. You can even have negative heights. Here\u2019s a simple example: import numpy as np custom_mat = np.random.randn(4,5) ggplot() + geom_logo(custom_mat, method='custom', seq_type='DNA') + theme_seq()","title":"Custom-height logos"},{"location":"#visualizing-sequence-histogram","text":"","title":"Visualizing sequence histogram"},{"location":"#plot-a-sequence-histogram","text":"You can draw a conservation bar of aligned sequences using ggplot function, with geom_seqBar. Let\u2019s try this on sequences for one of the transcription factors from JASPAR: from plotnineseqsuite.bar import geom_seqBar ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq()","title":"Plot a sequence histogram"},{"location":"#accepted-input-formats_2","text":"geom_seqBar accepts three types of input, each described in detail below list: a list of aligned sequences numpy.ndarray: a position frequency matrix, where the row is the letter, and column is the position. Note: The order of the rows corresponds to the NAMESPACE one by one. dict: It is used for plotting more than one sequence histogram at the same time with the help of facets, the key of dict is facet value, and the value of dict is list or numpy.ndarray described above The following generates a sequence histogram using a position frequency matrix from the sample data ggplot() + geom_seqBar(pfms_dna['MA0018.2'],seq_type='DNA') + theme_seq()","title":"Accepted input formats"},{"location":"#no-sequence-letter_1","text":"When font option is None, the picture has only the bar. ggplot() + geom_seqBar(seqs_dna['MA0013.1'], font=None) + theme_seq()","title":"No sequence letter"},{"location":"#sequence-types","text":"","title":"Sequence types"},{"location":"#preset-alphabets","text":"Amino acids, DNA and RNA sequence types are all supported by geom_logo, geom_seqBar and geom_alignedSeq. By default, plotnineSeqSuite will try to guess your sequence type. You can explicitly set the sequence type through the seq_type option. Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data: ggplot() + geom_logo( seqs_aa['AKT1'], seq_type = 'AA' ) + theme_seq()","title":"Preset alphabets"},{"location":"#custom-alphabet","text":"If you want to define a custom alphabet you can do so by setting namespace with your desired custom alphabet. For example, lets say you wanted a sequence logo of zeros and ones: from plotnine.guides import guides seqs_numeric = list(map(lambda x: x.replace('A','1').replace('T','2').replace('G','3').replace('C','4'), seqs_dna['MA0001.1'])) ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['1','2','3','4']) + theme_seq()+guides(fill=False) Greek letters are also supported: seqs_numeric = list(map(lambda x: x.replace('A','\u03b4').replace('T','\u03b5').replace('G','\u03c8').replace('C','\u03bb'), seqs_dna['MA0001.1'])) ggplot() + geom_logo(seqs_numeric, method='probability', namespace=['\u03b4','\u03b5','\u03c8','\u03bb']) + theme_seq()+guides(fill=False)","title":"Custom alphabet"},{"location":"#colour-schemes","text":"","title":"Colour schemes"},{"location":"#preset-color-schemes","text":"plotnineSeqSuite has preset color schemes that can be set using the col_scheme parameter in geom_logo, the parameter bar_col_scheme in geom_seqBar and the parameter bg_col_scheme in geom_alignedSeq. By default, the col_scheme is set to AUTO such that the color scheme is automatically chosen based on your sequence type. Lets try generate an amino acid sequence logo using kinase-substrate phosphorylation data: ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme='base_pairing') + theme_seq()","title":"Preset color schemes"},{"location":"#custom-color-schemes","text":"If the presets are not enough for you, you can define custom discrete or continuous color schemes using the col_schemes.make_col_scheme function. Here are two examples of discrete and continuous color schemes.","title":"Custom color schemes"},{"location":"#discrete-color-schemes","text":"from plotnineseqsuite.col_schemes import make_col_scheme cs1 = make_col_scheme(chars=['A', 'T', 'C', 'G'], groups=['gr1', 'gr1', 'gr2', 'gr2'],cols=['purple', 'purple', 'blue', 'blue']) ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme=cs1) + theme_seq()","title":"Discrete color schemes"},{"location":"#continuous-color-schemes","text":"cs2 = make_col_scheme(chars=['A', 'T', 'C', 'G'], values=[1,2,3,4]) ggplot() + geom_logo(seqs_dna['MA0001.1'], col_scheme=cs2) + theme_seq()","title":"Continuous color schemes"},{"location":"#multiple-grouped-sequences","text":"You can plot more than one grouped sequences at the same time with the help of facets. plotnineSeqSuite will accept a dict. The keys of the dict will be used as the facet titles. Take geom_logo for example. from plotnine import facet_wrap ggplot() + geom_logo(seqs_dna)+ theme_seq()+facet_wrap('~seq_group', ncol=4, scales='free_x')","title":"Multiple grouped sequences"},{"location":"#fonts","text":"You can adjust the font of letters by setting the font parameter. To list all the available color schemes use the font.list_fonts function. Take geom_logo for example. from plotnine import ggtitle import patchworklib as pw from plotnineseqsuite.font import list_fonts fonts = list_fonts() for i in range(0,12,3): g1 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i]) + theme_seq()+ggtitle(fonts[i])) g2 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i+1]) + theme_seq() + ggtitle(fonts[i+1])) g3 = pw.load_ggplot(ggplot() + geom_logo(data=seqs_dna['MA0001.1'], font=fonts[i+2]) + theme_seq() + ggtitle(fonts[i+2])) if i == 0: allgg = g1|g2|g3 else: temp = g1|g2|g3 allgg = temp/allgg allgg.savefig()","title":"Fonts"},{"location":"#advanced-plotnineseqsuite","text":"plotnineSeqSuite is based on object-oriented design. The *_data property of class geom_logo and geom_seqBar, geom_alignedSeq is a core feature.","title":"Advanced plotnineSeqSuite"},{"location":"#combining-plots","text":"Without using any packages like patchwork and cowplot, plotnineSeqSuite can easily plot geom_logo and geom_seqBar, geom_alignedSeq in one figure. I\u2019ll demonstrate with an example plotting probability sequence logo, aligned sequences and sequence histogram in one figure via changed *_data property. from plotnine import scale_y_continuous names = ['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f'] seqs = geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) logo = geom_logo(seqs_dna['MA0013.1'], method='probability') logo.data['y'] = logo.data['y']+6.1 bar = geom_seqBar(seqs_dna['MA0013.1'], font=None) bar.bar_data['y'] = bar.bar_data['y'] - 6.1 ggplot() + logo + bar + seqs + theme_seq() + scale_y_continuous(breaks=lambda x: [k + 0.5 for k in range(0, len(names))], labels=names)","title":"Combining plots"},{"location":"#modify-the-starting-position","text":"When the input sequence fragment does not start at 1, you can modify the x value of the property to display the correct starting position. names = ['seq-a', 'seq-b', 'seq-c', 'seq-d', 'seq-e', 'seq-f'] seqs = geom_alignedSeq(seqs_dna['MA0013.1'], seq_names=names) seqs.bg_data['x'] = seqs.bg_data['x']+3333 seqs.letter_data['x'] = seqs.letter_data['x']+3333 ggplot() + seqs + theme_seq()","title":"Modify the starting position"},{"location":"#used-concurrently-with-other-functions-of-plotnine","text":"Because plotnineSeqSuite is an extension of plotnine, functions of plotnine can be used without obstacles. Here is an example of drawing rectangles, lines and text. from plotnine.geoms import annotate ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + annotate('rect', xmin=0.5, xmax=3.5, ymin=-0.05, ymax=6.1,alpha=.1, color='black') + theme_seq() ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + annotate('segment', x=1.5, xend=2.5, y=0, yend=0, size=2, color='red') + annotate('segment', x=4.5, xend=7.5, y=0, yend=0, size=2, color='red') + annotate('segment', x=8.5, xend=11.5, y=0, yend=0,size=2, color='red') + annotate('text', x=6, y=-0.2,label='A is the most', color='red') + theme_seq()","title":"Used concurrently with other functions of plotnine."},{"location":"col_schemes/","text":"color schemes Default color schemes: chemistry, chemistry2, hydrophobicity, nucleotide, nucleotide2, base_pairing, clustalx, taylor. function make_col_scheme(name: str = 'Custom Scheme', chars: Optional[list[str]] = None, groups: Optional[list[int]] = None, cols: Optional[list[int]] = None, values: Optional[list[int]] = None) -> dict from plotnineseqsuite.col_schemes import make_col_scheme cs1 = make_col_scheme(chars=['A', 'T', 'C', 'G'], groups=['gr1', 'gr1', 'gr2', 'gr2'],cols=['purple', 'purple', 'blue', 'blue']) cs2 = make_col_scheme(chars=['A', 'T', 'C', 'G'], values=[1,2,3,4]) The function is used to create custom color style themes. name Name of custom scheme. It will display in legend. chars Letters will used to plot. groups Used in a custom discrete color scheme. It groups letters. cols Used in a custom discrete color scheme. It represents the RGB value of the grouped color. values Used in a custom continuous color scheme. It represents the numeric value of the corresponding letter. function get_col_scheme(col_scheme: str, seq_type: str = 'AUTO') -> dict This function is used to get the built-in color theme of the type of the given sequence. from plotnineseqsuite.col_schemes import get_col_scheme col_df = get_col_scheme(col_scheme='chemistry') col_scheme One of the default color schemes. seq_type AA or DNA or RNA","title":"color schemes"},{"location":"col_schemes/#color-schemes","text":"Default color schemes: chemistry, chemistry2, hydrophobicity, nucleotide, nucleotide2, base_pairing, clustalx, taylor.","title":"color schemes"},{"location":"col_schemes/#function-make_col_schemename-str-custom-scheme-chars-optionalliststr-none-groups-optionallistint-none-cols-optionallistint-none-values-optionallistint-none-dict","text":"from plotnineseqsuite.col_schemes import make_col_scheme cs1 = make_col_scheme(chars=['A', 'T', 'C', 'G'], groups=['gr1', 'gr1', 'gr2', 'gr2'],cols=['purple', 'purple', 'blue', 'blue']) cs2 = make_col_scheme(chars=['A', 'T', 'C', 'G'], values=[1,2,3,4]) The function is used to create custom color style themes.","title":"function make_col_scheme(name: str = 'Custom Scheme', chars: Optional[list[str]] = None, groups: Optional[list[int]] = None, cols: Optional[list[int]] = None, values: Optional[list[int]] = None) -> dict"},{"location":"col_schemes/#name","text":"Name of custom scheme. It will display in legend.","title":"name"},{"location":"col_schemes/#chars","text":"Letters will used to plot.","title":"chars"},{"location":"col_schemes/#groups","text":"Used in a custom discrete color scheme. It groups letters.","title":"groups"},{"location":"col_schemes/#cols","text":"Used in a custom discrete color scheme. It represents the RGB value of the grouped color.","title":"cols"},{"location":"col_schemes/#values","text":"Used in a custom continuous color scheme. It represents the numeric value of the corresponding letter.","title":"values"},{"location":"col_schemes/#function-get_col_schemecol_scheme-str-seq_type-str-auto-dict","text":"This function is used to get the built-in color theme of the type of the given sequence. from plotnineseqsuite.col_schemes import get_col_scheme col_df = get_col_scheme(col_scheme='chemistry')","title":"function get_col_scheme(col_scheme: str, seq_type: str = 'AUTO') -> dict"},{"location":"col_schemes/#col_scheme","text":"One of the default color schemes.","title":"col_scheme"},{"location":"col_schemes/#seq_type","text":"AA or DNA or RNA","title":"seq_type"},{"location":"font/","text":"font Default fonts: times_new_roman, arial, courier_new, akrobat_bold, xkcd_regular, akrobat_regular, helvetica_bold, helvetica_light, helvetica_regular, roboto_bold, roboto_medium, roboto_regular, roboto_slab_bold, roboto_slab_light, roboto_slab_regular. function list_fonts() Get all fonts. function get_font(font_name: str) -> DataFrame from plotnineseqsuite import get_font f_df = get_font(font_name='times_new_roman') Gets the specified font. font_name Name of one of the default fonts.","title":"font"},{"location":"font/#font","text":"Default fonts: times_new_roman, arial, courier_new, akrobat_bold, xkcd_regular, akrobat_regular, helvetica_bold, helvetica_light, helvetica_regular, roboto_bold, roboto_medium, roboto_regular, roboto_slab_bold, roboto_slab_light, roboto_slab_regular.","title":"font"},{"location":"font/#function-list_fonts","text":"Get all fonts.","title":"function list_fonts()"},{"location":"font/#function-get_fontfont_name-str-dataframe","text":"from plotnineseqsuite import get_font f_df = get_font(font_name='times_new_roman') Gets the specified font.","title":"function get_font(font_name: str) -> DataFrame"},{"location":"font/#font_name","text":"Name of one of the default fonts.","title":"font_name"},{"location":"geom_alignedSeq/","text":"geom_alignedSeq A class that represents the sequence alignment diagram class geom_alignedSeq(self,data: Union[list[str], dict, None] = None,seq_names: Optional[list[str]] = None,seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.75,font_col: str = '#FFFFFF',bg_col_scheme: Union[DataFrame, str] = 'AUTO',bg_low_col: str = 'black',bg_high_col: str = 'yellow',bg_na_col: str = '#333333',**kwargs: Any) from plotnine import ggplot, coord_fixed from plotnineseqsuite import geom_alignedSeq, theme_seq from plotnineseqsuite.data import seqs_dna ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed() data Sequence data or corresponding dict. seq_names The name corresponding to the sequence data. seq_type OTHER, AA, DNA, RNA namespace The letter corresponding to the data. font Font value stack_width The ratio of the size of letters to the standard unit width. font_col The color of the font. bg_col_scheme Color scheme of the backgrounds. bg_low_col Continuous color schemes are available. bg_high_col Continuous color schemes are available. bg_na_col Used when the background in the corresponding namespace do not have a color matching value defined. kwargs Other arguments passed on to layer().","title":"geom_alignedSeq"},{"location":"geom_alignedSeq/#geom_alignedseq","text":"A class that represents the sequence alignment diagram","title":"geom_alignedSeq"},{"location":"geom_alignedSeq/#class-geom_alignedseqselfdata-unionliststr-dict-none-noneseq_names-optionalliststr-noneseq_type-str-autonamespace-optionalliststr-nonefont-str-roboto_mediumstack_width-float-075font_col-str-ffffffbg_col_scheme-uniondataframe-str-autobg_low_col-str-blackbg_high_col-str-yellowbg_na_col-str-333333kwargs-any","text":"from plotnine import ggplot, coord_fixed from plotnineseqsuite import geom_alignedSeq, theme_seq from plotnineseqsuite.data import seqs_dna ggplot() + geom_alignedSeq(seqs_dna['MA0013.1']) + theme_seq() + coord_fixed()","title":"class geom_alignedSeq(self,data: Union[list[str], dict, None] = None,seq_names: Optional[list[str]] = None,seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.75,font_col: str = '#FFFFFF',bg_col_scheme: Union[DataFrame, str] = 'AUTO',bg_low_col: str = 'black',bg_high_col: str = 'yellow',bg_na_col: str = '#333333',**kwargs: Any)"},{"location":"geom_alignedSeq/#data","text":"Sequence data or corresponding dict.","title":"data"},{"location":"geom_alignedSeq/#seq_names","text":"The name corresponding to the sequence data.","title":"seq_names"},{"location":"geom_alignedSeq/#seq_type","text":"OTHER, AA, DNA, RNA","title":"seq_type"},{"location":"geom_alignedSeq/#namespace","text":"The letter corresponding to the data.","title":"namespace"},{"location":"geom_alignedSeq/#font","text":"Font value","title":"font"},{"location":"geom_alignedSeq/#stack_width","text":"The ratio of the size of letters to the standard unit width.","title":"stack_width"},{"location":"geom_alignedSeq/#font_col","text":"The color of the font.","title":"font_col"},{"location":"geom_alignedSeq/#bg_col_scheme","text":"Color scheme of the backgrounds.","title":"bg_col_scheme"},{"location":"geom_alignedSeq/#bg_low_col","text":"Continuous color schemes are available.","title":"bg_low_col"},{"location":"geom_alignedSeq/#bg_high_col","text":"Continuous color schemes are available.","title":"bg_high_col"},{"location":"geom_alignedSeq/#bg_na_col","text":"Used when the background in the corresponding namespace do not have a color matching value defined.","title":"bg_na_col"},{"location":"geom_alignedSeq/#kwargs","text":"Other arguments passed on to layer().","title":"kwargs"},{"location":"geom_logo/","text":"geom_logo A class that represents the sequence logo class geom_logo(self,data: Union[list[str], ndarray, dict, None] = None,method: str = 'bits',seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.95,rev_stack_order: bool = False,col_scheme: Union[DataFrame, str] = 'AUTO',low_col: str = 'black',high_col: str = 'yellow',na_col: str = '#333333',**kwargs: Any) from plotnine import ggplot from plotnineseqsuite import geom_logo, theme_seq from plotnineseqsuite.data import seqs_dna ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq() data Sequence data or PFM or corresponding dict. method bits, probability, custom seq_type OTHER, AA, DNA, RNA namespace The letter corresponding to the data. If the type of data is ndarray, the order of the namespaces must correspond to that of ndarray. font Font value stack_width The ratio of the size of letters to the standard unit width. rev_stack_order Order of letter stack is reversed. col_scheme Color scheme of the letters. low_col Continuous color schemes are available. high_col Continuous color schemes are available. na_col Used when the letters in the corresponding namespace do not have a color matching value defined. kwargs Other arguments passed on to layer().","title":"geom_logo"},{"location":"geom_logo/#geom_logo","text":"A class that represents the sequence logo","title":"geom_logo"},{"location":"geom_logo/#class-geom_logoselfdata-unionliststr-ndarray-dict-none-nonemethod-str-bitsseq_type-str-autonamespace-optionalliststr-nonefont-str-roboto_mediumstack_width-float-095rev_stack_order-bool-falsecol_scheme-uniondataframe-str-autolow_col-str-blackhigh_col-str-yellowna_col-str-333333kwargs-any","text":"from plotnine import ggplot from plotnineseqsuite import geom_logo, theme_seq from plotnineseqsuite.data import seqs_dna ggplot() + geom_logo(seqs_dna['MA0001.1']) + theme_seq()","title":"class geom_logo(self,data: Union[list[str], ndarray, dict, None] = None,method: str = 'bits',seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.95,rev_stack_order: bool = False,col_scheme: Union[DataFrame, str] = 'AUTO',low_col: str = 'black',high_col: str = 'yellow',na_col: str = '#333333',**kwargs: Any)"},{"location":"geom_logo/#data","text":"Sequence data or PFM or corresponding dict.","title":"data"},{"location":"geom_logo/#method","text":"bits, probability, custom","title":"method"},{"location":"geom_logo/#seq_type","text":"OTHER, AA, DNA, RNA","title":"seq_type"},{"location":"geom_logo/#namespace","text":"The letter corresponding to the data. If the type of data is ndarray, the order of the namespaces must correspond to that of ndarray.","title":"namespace"},{"location":"geom_logo/#font","text":"Font value","title":"font"},{"location":"geom_logo/#stack_width","text":"The ratio of the size of letters to the standard unit width.","title":"stack_width"},{"location":"geom_logo/#rev_stack_order","text":"Order of letter stack is reversed.","title":"rev_stack_order"},{"location":"geom_logo/#col_scheme","text":"Color scheme of the letters.","title":"col_scheme"},{"location":"geom_logo/#low_col","text":"Continuous color schemes are available.","title":"low_col"},{"location":"geom_logo/#high_col","text":"Continuous color schemes are available.","title":"high_col"},{"location":"geom_logo/#na_col","text":"Used when the letters in the corresponding namespace do not have a color matching value defined.","title":"na_col"},{"location":"geom_logo/#kwargs","text":"Other arguments passed on to layer().","title":"kwargs"},{"location":"geom_seqBar/","text":"geom_seqBar A class that represents the sequence histogram class geom_seqBar(self,data: Union[list[str], ndarray, dict, None] = None,seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.75,bar_col_scheme: Union[DataFrame, str] = 'AUTO',font_col: str = '#808080',low_col: str = 'black',high_col: str = 'yellow',na_col: str = '#333333',**kwargs: Any) from plotnine import ggplot from plotnineseqsuite import geom_seqBar, theme_seq from plotnineseqsuite.data import seqs_dna ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq() data Sequence data or PFM or corresponding dict. seq_type OTHER, AA, DNA, RNA namespace The letter corresponding to the data. If the type of data is ndarray, the order of the namespaces must correspond to that of ndarray. font Font value stack_width The ratio of the size of letters and the width of bars to the standard unit width. bar_col_scheme Color scheme of the cylinder. font_col The color of the font. low_col Continuous color schemes are available. high_col Continuous color schemes are available. na_col Used when the letters in the corresponding namespace do not have a color matching value defined. kwargs Other arguments passed on to layer().","title":"geom_seqBar"},{"location":"geom_seqBar/#geom_seqbar","text":"A class that represents the sequence histogram","title":"geom_seqBar"},{"location":"geom_seqBar/#class-geom_seqbarselfdata-unionliststr-ndarray-dict-none-noneseq_type-str-autonamespace-optionalliststr-nonefont-str-roboto_mediumstack_width-float-075bar_col_scheme-uniondataframe-str-autofont_col-str-808080low_col-str-blackhigh_col-str-yellowna_col-str-333333kwargs-any","text":"from plotnine import ggplot from plotnineseqsuite import geom_seqBar, theme_seq from plotnineseqsuite.data import seqs_dna ggplot() + geom_seqBar(seqs_dna['MA0013.1']) + theme_seq()","title":"class geom_seqBar(self,data: Union[list[str], ndarray, dict, None] = None,seq_type: str = 'AUTO',namespace: Optional[list[str]] = None,font: str = 'roboto_medium',stack_width: float = 0.75,bar_col_scheme: Union[DataFrame, str] = 'AUTO',font_col: str = '#808080',low_col: str = 'black',high_col: str = 'yellow',na_col: str = '#333333',**kwargs: Any)"},{"location":"geom_seqBar/#data","text":"Sequence data or PFM or corresponding dict.","title":"data"},{"location":"geom_seqBar/#seq_type","text":"OTHER, AA, DNA, RNA","title":"seq_type"},{"location":"geom_seqBar/#namespace","text":"The letter corresponding to the data. If the type of data is ndarray, the order of the namespaces must correspond to that of ndarray.","title":"namespace"},{"location":"geom_seqBar/#font","text":"Font value","title":"font"},{"location":"geom_seqBar/#stack_width","text":"The ratio of the size of letters and the width of bars to the standard unit width.","title":"stack_width"},{"location":"geom_seqBar/#bar_col_scheme","text":"Color scheme of the cylinder.","title":"bar_col_scheme"},{"location":"geom_seqBar/#font_col","text":"The color of the font.","title":"font_col"},{"location":"geom_seqBar/#low_col","text":"Continuous color schemes are available.","title":"low_col"},{"location":"geom_seqBar/#high_col","text":"Continuous color schemes are available.","title":"high_col"},{"location":"geom_seqBar/#na_col","text":"Used when the letters in the corresponding namespace do not have a color matching value defined.","title":"na_col"},{"location":"geom_seqBar/#kwargs","text":"Other arguments passed on to layer().","title":"kwargs"},{"location":"theme/","text":"theme function theme_seq() -> theme Default theme. Plain white background.","title":"theme"},{"location":"theme/#theme","text":"","title":"theme"},{"location":"theme/#function-theme_seq-theme","text":"Default theme. Plain white background.","title":"function theme_seq() -> theme"}]} \ No newline at end of file diff --git a/site/search/worker.js b/site/search/worker.js new file mode 100644 index 0000000..8628dbc --- /dev/null +++ b/site/search/worker.js @@ -0,0 +1,133 @@ +var base_path = 'function' === typeof importScripts ? '.' : '/search/'; +var allowSearch = false; +var index; +var documents = {}; +var lang = ['en']; +var data; + +function getScript(script, callback) { + console.log('Loading script: ' + script); + $.getScript(base_path + script).done(function () { + callback(); + }).fail(function (jqxhr, settings, exception) { + console.log('Error: ' + exception); + }); +} + +function getScriptsInOrder(scripts, callback) { + if (scripts.length === 0) { + callback(); + return; + } + getScript(scripts[0], function() { + getScriptsInOrder(scripts.slice(1), callback); + }); +} + +function loadScripts(urls, callback) { + if( 'function' === typeof importScripts ) { + importScripts.apply(null, urls); + callback(); + } else { + getScriptsInOrder(urls, callback); + } +} + +function onJSONLoaded () { + data = JSON.parse(this.responseText); + var scriptsToLoad = ['lunr.js']; + if (data.config && data.config.lang && data.config.lang.length) { + lang = data.config.lang; + } + if (lang.length > 1 || lang[0] !== "en") { + scriptsToLoad.push('lunr.stemmer.support.js'); + if (lang.length > 1) { + scriptsToLoad.push('lunr.multi.js'); + } + if (lang.includes("ja") || lang.includes("jp")) { + scriptsToLoad.push('tinyseg.js'); + } + for (var i=0; i < lang.length; i++) { + if (lang[i] != 'en') { + scriptsToLoad.push(['lunr', lang[i], 'js'].join('.')); + } + } + } + loadScripts(scriptsToLoad, onScriptsLoaded); +} + +function onScriptsLoaded () { + console.log('All search scripts loaded, building Lunr index...'); + if (data.config && data.config.separator && data.config.separator.length) { + lunr.tokenizer.separator = new RegExp(data.config.separator); + } + + if (data.index) { + index = lunr.Index.load(data.index); + data.docs.forEach(function (doc) { + documents[doc.location] = doc; + }); + console.log('Lunr pre-built index loaded, search ready'); + } else { + index = lunr(function () { + if (lang.length === 1 && lang[0] !== "en" && lunr[lang[0]]) { + this.use(lunr[lang[0]]); + } else if (lang.length > 1) { + this.use(lunr.multiLanguage.apply(null, lang)); // spread operator not supported in all browsers: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Spread_operator#Browser_compatibility + } + this.field('title'); + this.field('text'); + this.ref('location'); + + for (var i=0; i < data.docs.length; i++) { + var doc = data.docs[i]; + this.add(doc); + documents[doc.location] = doc; + } + }); + console.log('Lunr index built, search ready'); + } + allowSearch = true; + postMessage({config: data.config}); + postMessage({allowSearch: allowSearch}); +} + +function init () { + var oReq = new XMLHttpRequest(); + oReq.addEventListener("load", onJSONLoaded); + var index_path = base_path + '/search_index.json'; + if( 'function' === typeof importScripts ){ + index_path = 'search_index.json'; + } + oReq.open("GET", index_path); + oReq.send(); +} + +function search (query) { + if (!allowSearch) { + console.error('Assets for search still loading'); + return; + } + + var resultDocuments = []; + var results = index.search(query); + for (var i=0; i < results.length; i++){ + var result = results[i]; + doc = documents[result.ref]; + doc.summary = doc.text.substring(0, 200); + resultDocuments.push(doc); + } + return resultDocuments; +} + +if( 'function' === typeof importScripts ) { + onmessage = function (e) { + if (e.data.init) { + init(); + } else if (e.data.query) { + postMessage({ results: search(e.data.query) }); + } else { + console.error("Worker - Unrecognized message: " + e); + } + }; +} diff --git a/site/sitemap.xml b/site/sitemap.xml new file mode 100644 index 0000000..067e6dc --- /dev/null +++ b/site/sitemap.xml @@ -0,0 +1,38 @@ + +Default theme. Plain white background.
+ +